- 프론트엔드: Web Audio API 전처리 파이프라인 (GainNode + DynamicsCompressor + AnalyserNode) - 프론트엔드: VU 미터 실시간 레벨 표시 + 마이크 감도 슬라이더 (0.5x~3.0x) - 프론트엔드: getUserMedia constraints 강화 + MediaRecorder 128kbps Opus - 백엔드: Google STT V2 API + Chirp 2 모델 batchRecognize 메서드 추가 - 백엔드: V2→V1 자동 폴백 래퍼 (speechToTextWithDiarizationAuto) - 백엔드: Speech Adaptation 도메인 용어 힌트 (블라인드/스크린 등 22개) - 백엔드: V2 SentencePiece 토큰 자동 감지 분기 처리 - 설정: config/services.php에 google.location 추가 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
990 lines
32 KiB
PHP
990 lines
32 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use Illuminate\Support\Facades\Http;
|
|
use Illuminate\Support\Facades\Log;
|
|
use Illuminate\Support\Facades\Storage;
|
|
|
|
/**
|
|
* Google Cloud 서비스 (Storage, Speech-to-Text)
|
|
*/
|
|
class GoogleCloudService
|
|
{
|
|
private ?array $serviceAccount = null;
|
|
|
|
private ?string $accessToken = null;
|
|
|
|
private ?int $tokenExpiry = null;
|
|
|
|
public function __construct()
|
|
{
|
|
$this->loadServiceAccount();
|
|
}
|
|
|
|
/**
|
|
* 서비스 계정 로드
|
|
*/
|
|
private function loadServiceAccount(): void
|
|
{
|
|
$path = config('services.google.credentials_path');
|
|
|
|
if ($path && file_exists($path)) {
|
|
$this->serviceAccount = json_decode(file_get_contents($path), true);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* OAuth 토큰 발급
|
|
*/
|
|
private function getAccessToken(): ?string
|
|
{
|
|
// 캐시된 토큰이 유효하면 재사용
|
|
if ($this->accessToken && $this->tokenExpiry && time() < $this->tokenExpiry - 60) {
|
|
return $this->accessToken;
|
|
}
|
|
|
|
if (! $this->serviceAccount) {
|
|
Log::error('Google Cloud: 서비스 계정 파일이 없습니다.');
|
|
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
$now = time();
|
|
$jwtHeader = base64_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT']));
|
|
$jwtClaim = base64_encode(json_encode([
|
|
'iss' => $this->serviceAccount['client_email'],
|
|
'scope' => 'https://www.googleapis.com/auth/cloud-platform',
|
|
'aud' => 'https://oauth2.googleapis.com/token',
|
|
'exp' => $now + 3600,
|
|
'iat' => $now,
|
|
]));
|
|
|
|
$privateKey = openssl_pkey_get_private($this->serviceAccount['private_key']);
|
|
if (! $privateKey) {
|
|
Log::error('Google Cloud: 개인 키 읽기 실패');
|
|
|
|
return null;
|
|
}
|
|
|
|
openssl_sign($jwtHeader.'.'.$jwtClaim, $signature, $privateKey, OPENSSL_ALGO_SHA256);
|
|
|
|
$jwt = $jwtHeader.'.'.$jwtClaim.'.'.base64_encode($signature);
|
|
|
|
$response = Http::asForm()->post('https://oauth2.googleapis.com/token', [
|
|
'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer',
|
|
'assertion' => $jwt,
|
|
]);
|
|
|
|
if ($response->successful()) {
|
|
$data = $response->json();
|
|
$this->accessToken = $data['access_token'];
|
|
$this->tokenExpiry = $now + ($data['expires_in'] ?? 3600);
|
|
|
|
return $this->accessToken;
|
|
}
|
|
|
|
Log::error('Google Cloud: OAuth 토큰 발급 실패', ['response' => $response->body()]);
|
|
|
|
return null;
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: OAuth 토큰 발급 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GCS에 파일 업로드
|
|
* @return array|null ['uri' => 'gs://...', 'size' => bytes] or null
|
|
*/
|
|
public function uploadToStorage(string $localPath, string $objectName): ?array
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
$bucket = config('services.google.storage_bucket');
|
|
if (! $bucket) {
|
|
Log::error('Google Cloud: Storage 버킷 설정 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
$fileContent = file_get_contents($localPath);
|
|
$fileSize = strlen($fileContent);
|
|
$mimeType = mime_content_type($localPath) ?: 'audio/webm';
|
|
|
|
$uploadUrl = 'https://storage.googleapis.com/upload/storage/v1/b/'.
|
|
urlencode($bucket).'/o?uploadType=media&name='.
|
|
urlencode($objectName);
|
|
|
|
$response = Http::withToken($token)
|
|
->withHeaders(['Content-Type' => $mimeType])
|
|
->withBody($fileContent, $mimeType)
|
|
->post($uploadUrl);
|
|
|
|
if ($response->successful()) {
|
|
$result = $response->json();
|
|
Log::info('Google Cloud: Storage 업로드 성공', [
|
|
'object' => $objectName,
|
|
'size' => $result['size'] ?? $fileSize,
|
|
'bucket' => $bucket,
|
|
]);
|
|
|
|
return [
|
|
'uri' => 'gs://'.$bucket.'/'.$objectName,
|
|
'size' => (int) ($result['size'] ?? $fileSize),
|
|
];
|
|
}
|
|
|
|
Log::error('Google Cloud: Storage 업로드 실패', ['response' => $response->body()]);
|
|
|
|
return null;
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: Storage 업로드 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Base64 오디오를 GCS에 업로드
|
|
* @return array|null ['uri' => 'gs://...', 'size' => bytes] or null
|
|
*/
|
|
public function uploadBase64Audio(string $base64Audio, string $objectName): ?array
|
|
{
|
|
// Base64 데이터 파싱
|
|
$audioData = $base64Audio;
|
|
if (preg_match('/^data:audio\/\w+;base64,(.+)$/', $base64Audio, $matches)) {
|
|
$audioData = $matches[1];
|
|
}
|
|
|
|
// 임시 파일 생성
|
|
$tempPath = storage_path('app/temp/'.uniqid('audio_').'.webm');
|
|
$tempDir = dirname($tempPath);
|
|
|
|
if (! is_dir($tempDir)) {
|
|
mkdir($tempDir, 0755, true);
|
|
}
|
|
|
|
file_put_contents($tempPath, base64_decode($audioData));
|
|
|
|
// GCS 업로드
|
|
$result = $this->uploadToStorage($tempPath, $objectName);
|
|
|
|
// 임시 파일 삭제
|
|
@unlink($tempPath);
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Speech-to-Text API 호출
|
|
*/
|
|
public function speechToText(string $gcsUri, string $languageCode = 'ko-KR'): ?string
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
// 긴 오디오는 비동기 처리 (LongRunningRecognize)
|
|
$response = Http::withToken($token)
|
|
->post('https://speech.googleapis.com/v1/speech:longrunningrecognize', [
|
|
'config' => [
|
|
'encoding' => 'WEBM_OPUS',
|
|
'sampleRateHertz' => 48000,
|
|
'languageCode' => $languageCode,
|
|
'enableAutomaticPunctuation' => true,
|
|
'model' => 'latest_long',
|
|
],
|
|
'audio' => [
|
|
'uri' => $gcsUri,
|
|
],
|
|
]);
|
|
|
|
if (! $response->successful()) {
|
|
Log::error('Google Cloud: STT 요청 실패', ['response' => $response->body()]);
|
|
|
|
return null;
|
|
}
|
|
|
|
$operation = $response->json();
|
|
$operationName = $operation['name'] ?? null;
|
|
|
|
Log::info('Google Cloud: STT 요청 응답', ['operation' => $operation]);
|
|
|
|
if (! $operationName) {
|
|
Log::error('Google Cloud: STT 작업 이름 없음', ['response_body' => $response->body()]);
|
|
|
|
return null;
|
|
}
|
|
|
|
// 작업 완료 대기 (폴링)
|
|
$result = $this->waitForSttOperation($operationName);
|
|
Log::info('Google Cloud: STT 완료', ['operationName' => $operationName, 'result_length' => strlen($result ?? '')]);
|
|
|
|
return $result;
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: STT 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* STT 작업 완료 대기
|
|
*/
|
|
private function waitForSttOperation(string $operationName, int $maxAttempts = 60): ?string
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
Log::error('Google Cloud: STT 폴링 토큰 획득 실패');
|
|
|
|
return null;
|
|
}
|
|
|
|
for ($i = 0; $i < $maxAttempts; $i++) {
|
|
sleep(5); // 5초 대기
|
|
|
|
$response = Http::withToken($token)
|
|
->get("https://speech.googleapis.com/v1/operations/{$operationName}");
|
|
|
|
if (! $response->successful()) {
|
|
continue;
|
|
}
|
|
|
|
$result = $response->json();
|
|
|
|
if (isset($result['done']) && $result['done']) {
|
|
if (isset($result['error'])) {
|
|
Log::error('Google Cloud: STT 작업 실패', ['error' => $result['error']]);
|
|
|
|
return null;
|
|
}
|
|
|
|
// 결과 텍스트 추출
|
|
$transcript = '';
|
|
$results = $result['response']['results'] ?? [];
|
|
|
|
foreach ($results as $res) {
|
|
$alternatives = $res['alternatives'] ?? [];
|
|
if (! empty($alternatives)) {
|
|
$transcript .= $alternatives[0]['transcript'] ?? '';
|
|
}
|
|
}
|
|
|
|
return $transcript;
|
|
}
|
|
}
|
|
|
|
Log::error('Google Cloud: STT 작업 타임아웃');
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Speaker Diarization을 포함한 Speech-to-Text API 호출
|
|
*
|
|
* @return array|null ['segments' => [...], 'full_transcript' => '...']
|
|
*/
|
|
public function speechToTextWithDiarization(
|
|
string $gcsUri,
|
|
string $languageCode = 'ko-KR',
|
|
int $minSpeakers = 2,
|
|
int $maxSpeakers = 6
|
|
): ?array {
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
$response = Http::withToken($token)
|
|
->post('https://speech.googleapis.com/v1/speech:longrunningrecognize', [
|
|
'config' => [
|
|
'encoding' => 'WEBM_OPUS',
|
|
'sampleRateHertz' => 48000,
|
|
'languageCode' => $languageCode,
|
|
'enableAutomaticPunctuation' => true,
|
|
'model' => 'latest_long',
|
|
'enableWordTimeOffsets' => true,
|
|
'diarizationConfig' => [
|
|
'enableSpeakerDiarization' => true,
|
|
'minSpeakerCount' => $minSpeakers,
|
|
'maxSpeakerCount' => $maxSpeakers,
|
|
],
|
|
],
|
|
'audio' => [
|
|
'uri' => $gcsUri,
|
|
],
|
|
]);
|
|
|
|
if (! $response->successful()) {
|
|
Log::error('Google Cloud: STT Diarization 요청 실패', ['response' => $response->body()]);
|
|
|
|
return null;
|
|
}
|
|
|
|
$operation = $response->json();
|
|
$operationName = $operation['name'] ?? null;
|
|
|
|
if (! $operationName) {
|
|
Log::error('Google Cloud: STT Diarization 작업 이름 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
Log::info('Google Cloud: STT Diarization 요청 시작', ['operationName' => $operationName]);
|
|
|
|
$rawResult = $this->waitForSttDiarizationOperation($operationName);
|
|
|
|
if (! $rawResult) {
|
|
return null;
|
|
}
|
|
|
|
return $this->parseDiarizationResult($rawResult);
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: STT Diarization 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* STT Diarization 작업 완료 대기 (raw 결과 반환)
|
|
*/
|
|
private function waitForSttDiarizationOperation(string $operationName, int $maxAttempts = 60): ?array
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
for ($i = 0; $i < $maxAttempts; $i++) {
|
|
sleep(5);
|
|
|
|
$response = Http::withToken($token)
|
|
->get("https://speech.googleapis.com/v1/operations/{$operationName}");
|
|
|
|
if (! $response->successful()) {
|
|
continue;
|
|
}
|
|
|
|
$result = $response->json();
|
|
|
|
if (isset($result['done']) && $result['done']) {
|
|
if (isset($result['error'])) {
|
|
Log::error('Google Cloud: STT Diarization 작업 실패', ['error' => $result['error']]);
|
|
|
|
return null;
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
Log::error('Google Cloud: STT Diarization 작업 타임아웃');
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Diarization 결과를 화자별 세그먼트로 파싱
|
|
*/
|
|
private function parseDiarizationResult(array $operationResult): ?array
|
|
{
|
|
$results = $operationResult['response']['results'] ?? [];
|
|
|
|
if (empty($results)) {
|
|
return null;
|
|
}
|
|
|
|
// Diarization 결과는 마지막 result의 alternatives[0].words에 전체 word-level 정보가 있음
|
|
$lastResult = end($results);
|
|
$words = $lastResult['alternatives'][0]['words'] ?? [];
|
|
|
|
if (empty($words)) {
|
|
// word-level 결과 없으면 일반 transcript로 폴백
|
|
$transcript = '';
|
|
foreach ($results as $res) {
|
|
$transcript .= ($res['alternatives'][0]['transcript'] ?? '') . ' ';
|
|
}
|
|
$transcript = $this->cleanSttText(trim($transcript));
|
|
|
|
return [
|
|
'segments' => [[
|
|
'speaker_name' => '화자 1',
|
|
'speaker_label' => '1',
|
|
'text' => $transcript,
|
|
'start_time_ms' => 0,
|
|
'end_time_ms' => null,
|
|
'is_manual_speaker' => false,
|
|
]],
|
|
'full_transcript' => '[화자 1] ' . $transcript,
|
|
'speaker_count' => 1,
|
|
];
|
|
}
|
|
|
|
// word-level 화자 정보를 세그먼트로 그룹핑
|
|
// Google STT의 SentencePiece 토크나이저: ▁(U+2581)는 새 단어 시작 표시
|
|
$segments = [];
|
|
$currentSpeaker = null;
|
|
$currentTokens = [];
|
|
$segmentStartMs = 0;
|
|
|
|
foreach ($words as $word) {
|
|
$speakerTag = $word['speakerTag'] ?? 0;
|
|
$wordText = $word['word'] ?? '';
|
|
$startMs = $this->parseGoogleTimeToMs($word['startTime'] ?? '0s');
|
|
$endMs = $this->parseGoogleTimeToMs($word['endTime'] ?? '0s');
|
|
|
|
// SentencePiece: ▁(U+2581) 또는 _로 시작하면 새 단어
|
|
$isNewWord = preg_match('/^[\x{2581}_]/u', $wordText);
|
|
|
|
// 모든 구분자 문자 제거: _(U+005F), ▁(U+2581)
|
|
$cleanToken = preg_replace('/[\x{2581}_]/u', '', $wordText);
|
|
if (trim($cleanToken) === '') {
|
|
continue;
|
|
}
|
|
|
|
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentTokens)) {
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => $this->joinSentencePieceTokens($currentTokens),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $startMs,
|
|
'is_manual_speaker' => false,
|
|
];
|
|
$currentTokens = [];
|
|
$segmentStartMs = $startMs;
|
|
}
|
|
|
|
$currentSpeaker = $speakerTag;
|
|
$currentTokens[] = ['text' => $cleanToken, 'new_word' => (bool) $isNewWord];
|
|
}
|
|
|
|
// 마지막 세그먼트
|
|
if (! empty($currentTokens)) {
|
|
$lastWord = end($words);
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => $this->joinSentencePieceTokens($currentTokens),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $this->parseGoogleTimeToMs($lastWord['endTime'] ?? '0s'),
|
|
'is_manual_speaker' => false,
|
|
];
|
|
}
|
|
|
|
// full_transcript 생성
|
|
$fullTranscript = '';
|
|
foreach ($segments as $seg) {
|
|
$fullTranscript .= "[{$seg['speaker_name']}] {$seg['text']}\n";
|
|
}
|
|
|
|
// 고유 화자 수
|
|
$speakerCount = count(array_unique(array_column($segments, 'speaker_label')));
|
|
|
|
return [
|
|
'segments' => $segments,
|
|
'full_transcript' => trim($fullTranscript),
|
|
'speaker_count' => $speakerCount,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Google STT 시간 형식("1.500s")을 밀리초로 변환
|
|
*/
|
|
private function parseGoogleTimeToMs(string $timeStr): int
|
|
{
|
|
if (preg_match('/^([\d.]+)s$/', $timeStr, $matches)) {
|
|
return (int) round((float) $matches[1] * 1000);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* SentencePiece 토큰 배열을 자연스러운 텍스트로 결합
|
|
*
|
|
* ▁(U+2581)가 있던 토큰은 새 단어 시작 → 앞에 공백 추가
|
|
* ▁가 없던 토큰은 이전 단어에 바로 붙임
|
|
*/
|
|
private function joinSentencePieceTokens(array $tokens): string
|
|
{
|
|
$result = '';
|
|
foreach ($tokens as $i => $token) {
|
|
if ($i === 0) {
|
|
$result = $token['text'];
|
|
} elseif ($token['new_word']) {
|
|
$result .= ' ' . $token['text'];
|
|
} else {
|
|
$result .= $token['text'];
|
|
}
|
|
}
|
|
|
|
return trim(preg_replace('/\s{2,}/', ' ', $result));
|
|
}
|
|
|
|
/**
|
|
* STT 텍스트에서 SentencePiece/언더스코어 노이즈 제거
|
|
*/
|
|
private function cleanSttText(string $text): string
|
|
{
|
|
// ▁(U+2581)를 공백으로, _(U+005F)는 제거, 연속 공백 정리
|
|
$cleaned = preg_replace('/\x{2581}/u', ' ', $text);
|
|
$cleaned = str_replace('_', '', $cleaned);
|
|
|
|
return trim(preg_replace('/\s{2,}/', ' ', $cleaned));
|
|
}
|
|
|
|
/**
|
|
* Speech-to-Text V2 API + Chirp 2 모델
|
|
*
|
|
* @return array|null ['segments' => [...], 'full_transcript' => '...', 'speaker_count' => int]
|
|
*/
|
|
public function speechToTextV2(
|
|
string $gcsUri,
|
|
string $languageCode = 'ko-KR',
|
|
int $minSpeakers = 2,
|
|
int $maxSpeakers = 6,
|
|
array $phraseHints = []
|
|
): ?array {
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
$projectId = $this->serviceAccount['project_id'] ?? null;
|
|
if (! $projectId) {
|
|
Log::warning('Google Cloud: STT V2 - project_id 없음, V1 폴백 필요');
|
|
|
|
return null;
|
|
}
|
|
|
|
$location = config('services.google.location', 'us-central1');
|
|
|
|
try {
|
|
$requestBody = [
|
|
'config' => [
|
|
'configMask' => 'auto_decoding_config,model,language_codes,features',
|
|
'auto_decoding_config' => (object) [],
|
|
'model' => 'chirp_2',
|
|
'language_codes' => [$languageCode],
|
|
'features' => [
|
|
'enableAutomaticPunctuation' => true,
|
|
'enableWordTimeOffsets' => true,
|
|
'diarizationConfig' => [
|
|
'minSpeakerCount' => $minSpeakers,
|
|
'maxSpeakerCount' => $maxSpeakers,
|
|
],
|
|
],
|
|
],
|
|
'files' => [
|
|
['uri' => $gcsUri],
|
|
],
|
|
'recognitionOutputConfig' => [
|
|
'inlineResponseConfig' => (object) [],
|
|
],
|
|
];
|
|
|
|
// Speech Adaptation: phrase hints
|
|
if (! empty($phraseHints)) {
|
|
$phrases = array_map(fn ($p) => ['value' => $p, 'boost' => 10.0], $phraseHints);
|
|
$requestBody['config']['adaptation'] = [
|
|
'phraseSets' => [
|
|
[
|
|
'inlinePhraseSet' => [
|
|
'phrases' => $phrases,
|
|
],
|
|
],
|
|
],
|
|
];
|
|
$requestBody['config']['configMask'] .= ',adaptation';
|
|
}
|
|
|
|
$url = "https://speech.googleapis.com/v2/projects/{$projectId}/locations/{$location}/recognizers/_:batchRecognize";
|
|
|
|
Log::info('Google Cloud: STT V2 (Chirp 2) 요청 시작', [
|
|
'gcsUri' => $gcsUri,
|
|
'model' => 'chirp_2',
|
|
'language' => $languageCode,
|
|
'phraseHints' => count($phraseHints),
|
|
]);
|
|
|
|
$response = Http::withToken($token)
|
|
->timeout(30)
|
|
->post($url, $requestBody);
|
|
|
|
if (! $response->successful()) {
|
|
Log::error('Google Cloud: STT V2 요청 실패', [
|
|
'status' => $response->status(),
|
|
'response' => $response->body(),
|
|
]);
|
|
|
|
return null;
|
|
}
|
|
|
|
$operation = $response->json();
|
|
$operationName = $operation['name'] ?? null;
|
|
|
|
if (! $operationName) {
|
|
Log::error('Google Cloud: STT V2 작업 이름 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
Log::info('Google Cloud: STT V2 작업 시작됨', ['operationName' => $operationName]);
|
|
|
|
$operationResult = $this->waitForV2Operation($operationName);
|
|
|
|
if (! $operationResult) {
|
|
return null;
|
|
}
|
|
|
|
return $this->parseV2Result($operationResult, $gcsUri);
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: STT V2 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* V2 Operation 폴링
|
|
*/
|
|
private function waitForV2Operation(string $operationName, int $maxAttempts = 60): ?array
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return null;
|
|
}
|
|
|
|
for ($i = 0; $i < $maxAttempts; $i++) {
|
|
sleep(5);
|
|
|
|
$response = Http::withToken($token)
|
|
->get("https://speech.googleapis.com/v2/{$operationName}");
|
|
|
|
if (! $response->successful()) {
|
|
continue;
|
|
}
|
|
|
|
$result = $response->json();
|
|
|
|
if (isset($result['done']) && $result['done']) {
|
|
if (isset($result['error'])) {
|
|
Log::error('Google Cloud: STT V2 작업 실패', ['error' => $result['error']]);
|
|
|
|
return null;
|
|
}
|
|
|
|
Log::info('Google Cloud: STT V2 작업 완료');
|
|
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
Log::error('Google Cloud: STT V2 작업 타임아웃');
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* V2 batchRecognize 결과 파싱
|
|
*/
|
|
private function parseV2Result(array $operationResult, string $gcsUri): ?array
|
|
{
|
|
// V2 응답 구조: response.results[uri].transcript.results[].alternatives[].words[]
|
|
$batchResults = $operationResult['response']['results'] ?? [];
|
|
|
|
// URI 키로 결과 찾기
|
|
$transcriptData = $batchResults[$gcsUri] ?? null;
|
|
|
|
// URI가 정확히 매치하지 않으면 첫 번째 결과 사용
|
|
if (! $transcriptData && ! empty($batchResults)) {
|
|
$transcriptData = reset($batchResults);
|
|
}
|
|
|
|
if (! $transcriptData) {
|
|
Log::warning('Google Cloud: STT V2 결과 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
$results = $transcriptData['transcript']['results'] ?? [];
|
|
|
|
if (empty($results)) {
|
|
Log::warning('Google Cloud: STT V2 transcript 결과 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
// 마지막 result에 전체 word-level diarization 정보가 있음
|
|
$lastResult = end($results);
|
|
$words = $lastResult['alternatives'][0]['words'] ?? [];
|
|
|
|
if (empty($words)) {
|
|
// word-level 결과 없으면 일반 transcript 사용
|
|
$transcript = '';
|
|
foreach ($results as $res) {
|
|
$transcript .= ($res['alternatives'][0]['transcript'] ?? '') . ' ';
|
|
}
|
|
$transcript = trim($transcript);
|
|
|
|
return [
|
|
'segments' => [[
|
|
'speaker_name' => '화자 1',
|
|
'speaker_label' => '1',
|
|
'text' => $transcript,
|
|
'start_time_ms' => 0,
|
|
'end_time_ms' => null,
|
|
'is_manual_speaker' => false,
|
|
]],
|
|
'full_transcript' => '[화자 1] ' . $transcript,
|
|
'speaker_count' => 1,
|
|
];
|
|
}
|
|
|
|
// SentencePiece 토큰 여부 감지 (▁ 문자 포함 시)
|
|
$hasSentencePiece = false;
|
|
foreach ($words as $w) {
|
|
if (preg_match('/[\x{2581}_]/u', $w['word'] ?? '')) {
|
|
$hasSentencePiece = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// word-level 화자 정보를 세그먼트로 그룹핑
|
|
$segments = [];
|
|
$currentSpeaker = null;
|
|
$currentTokens = [];
|
|
$currentWords = [];
|
|
$segmentStartMs = 0;
|
|
|
|
foreach ($words as $word) {
|
|
$speakerTag = $word['speakerTag'] ?? 0;
|
|
$wordText = $word['word'] ?? '';
|
|
$startMs = $this->parseGoogleTimeToMs($word['startOffset'] ?? $word['startTime'] ?? '0s');
|
|
$endMs = $this->parseGoogleTimeToMs($word['endOffset'] ?? $word['endTime'] ?? '0s');
|
|
|
|
if ($hasSentencePiece) {
|
|
// SentencePiece 방식 처리
|
|
$isNewWord = preg_match('/^[\x{2581}_]/u', $wordText);
|
|
$cleanToken = preg_replace('/[\x{2581}_]/u', '', $wordText);
|
|
if (trim($cleanToken) === '') {
|
|
continue;
|
|
}
|
|
|
|
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentTokens)) {
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => $this->joinSentencePieceTokens($currentTokens),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $startMs,
|
|
'is_manual_speaker' => false,
|
|
];
|
|
$currentTokens = [];
|
|
$segmentStartMs = $startMs;
|
|
}
|
|
|
|
$currentSpeaker = $speakerTag;
|
|
$currentTokens[] = ['text' => $cleanToken, 'new_word' => (bool) $isNewWord];
|
|
} else {
|
|
// 일반 단어 방식 처리 (Chirp 2)
|
|
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentWords)) {
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => implode(' ', $currentWords),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $startMs,
|
|
'is_manual_speaker' => false,
|
|
];
|
|
$currentWords = [];
|
|
$segmentStartMs = $startMs;
|
|
}
|
|
|
|
$currentSpeaker = $speakerTag;
|
|
$currentWords[] = trim($wordText);
|
|
}
|
|
}
|
|
|
|
// 마지막 세그먼트
|
|
if ($hasSentencePiece && ! empty($currentTokens)) {
|
|
$lastWord = end($words);
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => $this->joinSentencePieceTokens($currentTokens),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $this->parseGoogleTimeToMs($lastWord['endOffset'] ?? $lastWord['endTime'] ?? '0s'),
|
|
'is_manual_speaker' => false,
|
|
];
|
|
} elseif (! $hasSentencePiece && ! empty($currentWords)) {
|
|
$lastWord = end($words);
|
|
$segments[] = [
|
|
'speaker_name' => '화자 ' . $currentSpeaker,
|
|
'speaker_label' => (string) $currentSpeaker,
|
|
'text' => implode(' ', $currentWords),
|
|
'start_time_ms' => $segmentStartMs,
|
|
'end_time_ms' => $this->parseGoogleTimeToMs($lastWord['endOffset'] ?? $lastWord['endTime'] ?? '0s'),
|
|
'is_manual_speaker' => false,
|
|
];
|
|
}
|
|
|
|
// full_transcript 생성
|
|
$fullTranscript = '';
|
|
foreach ($segments as $seg) {
|
|
$fullTranscript .= "[{$seg['speaker_name']}] {$seg['text']}\n";
|
|
}
|
|
|
|
$speakerCount = count(array_unique(array_column($segments, 'speaker_label')));
|
|
|
|
Log::info('Google Cloud: STT V2 파싱 완료', [
|
|
'segments' => count($segments),
|
|
'speakers' => $speakerCount,
|
|
'sentencePiece' => $hasSentencePiece,
|
|
]);
|
|
|
|
return [
|
|
'segments' => $segments,
|
|
'full_transcript' => trim($fullTranscript),
|
|
'speaker_count' => $speakerCount,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* V2 + Chirp 2 시도 → 실패 시 V1 + latest_long 자동 폴백
|
|
*
|
|
* @return array|null ['segments' => [...], 'full_transcript' => '...', 'speaker_count' => int, 'engine' => 'v2'|'v1']
|
|
*/
|
|
public function speechToTextWithDiarizationAuto(
|
|
string $gcsUri,
|
|
string $languageCode = 'ko-KR',
|
|
int $minSpeakers = 2,
|
|
int $maxSpeakers = 6,
|
|
array $phraseHints = []
|
|
): ?array {
|
|
$projectId = $this->serviceAccount['project_id'] ?? null;
|
|
|
|
// V2 + Chirp 2 시도
|
|
if ($projectId) {
|
|
Log::info('Google Cloud: STT V2 (Chirp 2) 시도');
|
|
|
|
$v2Result = $this->speechToTextV2($gcsUri, $languageCode, $minSpeakers, $maxSpeakers, $phraseHints);
|
|
|
|
if ($v2Result && ! empty($v2Result['segments'])) {
|
|
$v2Result['engine'] = 'v2';
|
|
|
|
return $v2Result;
|
|
}
|
|
|
|
Log::warning('Google Cloud: STT V2 실패, V1 폴백');
|
|
}
|
|
|
|
// V1 + latest_long 폴백
|
|
Log::info('Google Cloud: STT V1 (latest_long) 폴백 실행');
|
|
|
|
$v1Result = $this->speechToTextWithDiarization($gcsUri, $languageCode, $minSpeakers, $maxSpeakers);
|
|
|
|
if ($v1Result) {
|
|
$v1Result['engine'] = 'v1';
|
|
}
|
|
|
|
return $v1Result;
|
|
}
|
|
|
|
/**
|
|
* GCS 파일 삭제
|
|
*/
|
|
public function deleteFromStorage(string $objectName): bool
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
return false;
|
|
}
|
|
|
|
$bucket = config('services.google.storage_bucket');
|
|
if (! $bucket) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
$deleteUrl = 'https://storage.googleapis.com/storage/v1/b/'.
|
|
urlencode($bucket).'/o/'.urlencode($objectName);
|
|
|
|
$response = Http::withToken($token)->delete($deleteUrl);
|
|
|
|
return $response->successful();
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: Storage 삭제 예외', ['error' => $e->getMessage()]);
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GCS에서 파일 다운로드 (스트림)
|
|
*/
|
|
public function downloadFromStorage(string $objectName): ?string
|
|
{
|
|
$token = $this->getAccessToken();
|
|
if (! $token) {
|
|
Log::error('Google Cloud: 다운로드 토큰 획득 실패');
|
|
|
|
return null;
|
|
}
|
|
|
|
$bucket = config('services.google.storage_bucket');
|
|
if (! $bucket) {
|
|
Log::error('Google Cloud: Storage 버킷 설정 없음');
|
|
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
$url = 'https://storage.googleapis.com/storage/v1/b/'.
|
|
urlencode($bucket).'/o/'.urlencode($objectName).'?alt=media';
|
|
|
|
$response = Http::withToken($token)->get($url);
|
|
|
|
if ($response->successful()) {
|
|
Log::info('Google Cloud: Storage 다운로드 성공', [
|
|
'object' => $objectName,
|
|
'size' => strlen($response->body()),
|
|
]);
|
|
|
|
return $response->body();
|
|
}
|
|
|
|
Log::error('Google Cloud: Storage 다운로드 실패', [
|
|
'status' => $response->status(),
|
|
'response' => $response->body(),
|
|
]);
|
|
|
|
return null;
|
|
} catch (\Exception $e) {
|
|
Log::error('Google Cloud: Storage 다운로드 예외', ['error' => $e->getMessage()]);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 서비스 사용 가능 여부
|
|
*/
|
|
public function isAvailable(): bool
|
|
{
|
|
return $this->serviceAccount !== null && $this->getAccessToken() !== null;
|
|
}
|
|
}
|