diff --git a/plane-src/apps/web/core/components/voice-tasker/global-control.tsx b/plane-src/apps/web/core/components/voice-tasker/global-control.tsx index c1a83bf..c080bd3 100644 --- a/plane-src/apps/web/core/components/voice-tasker/global-control.tsx +++ b/plane-src/apps/web/core/components/voice-tasker/global-control.tsx @@ -87,6 +87,13 @@ const VOICE_TASK_TIME_HOURS = Array.from({ length: 24 }, (_, index) => index.toS const VOICE_TASK_TIME_MINUTES = Array.from({ length: 60 }, (_, index) => index.toString().padStart(2, "0")); const VOICE_TASK_TIME_WHEEL_ITEM_HEIGHT = 36; const VOICE_TASK_WAVEFORM_BAR_COUNT = 32; +const VOICE_TASK_SPEECH_MIN_HZ = 85; +const VOICE_TASK_SPEECH_MAX_HZ = 3800; + +function clampVoiceTaskLevel(value: number) { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} function getSupportedMimeType() { if (typeof MediaRecorder === "undefined") return ""; @@ -388,14 +395,7 @@ function VoiceTaskAudioPlayer({ audioUrl }: { audioUrl: string }) { } function VoiceTaskWaveform({ isRecording, levels }: { isRecording: boolean; levels: number[] }) { - const fallbackLevels = useMemo( - () => - Array.from({ length: VOICE_TASK_WAVEFORM_BAR_COUNT }, (_, index) => { - const wave = Math.sin(index * 0.75) * 0.5 + 0.5; - return 0.18 + wave * 0.42; - }), - [] - ); + const fallbackLevels = useMemo(() => Array.from({ length: VOICE_TASK_WAVEFORM_BAR_COUNT }, () => 0), []); const renderedLevels = isRecording && levels.length ? levels : fallbackLevels; return ( @@ -409,12 +409,12 @@ function VoiceTaskWaveform({ isRecording, levels }: { isRecording: boolean; leve @@ -799,10 +799,19 @@ export function VoiceTaskerGlobalControl({ workspaceSlug }: Props) { const audioContext = new AudioContextClass(); const analyser = audioContext.createAnalyser(); const source = audioContext.createMediaStreamSource(stream); - const frequencyData = new Uint8Array(analyser.frequencyBinCount); + const previousLevels = Array.from({ length: VOICE_TASK_WAVEFORM_BAR_COUNT }, () => 0); + const historyLevels = Array.from({ length: Math.ceil(VOICE_TASK_WAVEFORM_BAR_COUNT / 2) }, () => 0); + let smoothedVoiceLevel = 0; + let rollingNoiseFloor = 0.035; + let rollingVoiceCeiling = 0.28; + let lastHistoryUpdate = 0; analyser.fftSize = 1024; - analyser.smoothingTimeConstant = 0.72; + analyser.minDecibels = -90; + analyser.maxDecibels = -12; + analyser.smoothingTimeConstant = 0.38; + const frequencyData = new Uint8Array(analyser.frequencyBinCount); + const timeDomainData = new Uint8Array(analyser.fftSize); source.connect(analyser); audioContextRef.current = audioContext; audioSourceRef.current = source; @@ -810,13 +819,100 @@ export function VoiceTaskerGlobalControl({ workspaceSlug }: Props) { const tick = () => { analyser.getByteFrequencyData(frequencyData); + analyser.getByteTimeDomainData(timeDomainData); + + let rmsSum = 0; + for (const value of timeDomainData) { + const centeredValue = (value - 128) / 128; + rmsSum += centeredValue * centeredValue; + } + + const rms = Math.sqrt(rmsSum / timeDomainData.length); + const nyquist = audioContext.sampleRate / 2; + const voiceStartBin = Math.max(1, Math.floor((VOICE_TASK_SPEECH_MIN_HZ / nyquist) * frequencyData.length)); + const voiceEndBin = Math.min( + frequencyData.length - 1, + Math.ceil((VOICE_TASK_SPEECH_MAX_HZ / nyquist) * frequencyData.length) + ); + let bandEnergy = 0; + let bandWeight = 0; + + for (let bin = voiceStartBin; bin <= voiceEndBin; bin++) { + const frequency = (bin / frequencyData.length) * nyquist; + const voiceWeight = frequency < 160 ? 0.55 : frequency > 3200 ? 0.65 : frequency < 260 ? 0.82 : 1; + bandEnergy += Math.pow(frequencyData[bin] / 255, 1.12) * voiceWeight; + bandWeight += voiceWeight; + } + + const frequencyEnergy = bandWeight > 0 ? bandEnergy / bandWeight : 0; + const rmsEnergy = clampVoiceTaskLevel((rms - 0.006) / 0.13); + const rawVoiceEnergy = frequencyEnergy * 0.68 + rmsEnergy * 0.32; + const floorSpeed = rawVoiceEnergy > rollingNoiseFloor ? 0.004 : 0.045; + rollingNoiseFloor = clampVoiceTaskLevel(rollingNoiseFloor + (rawVoiceEnergy - rollingNoiseFloor) * floorSpeed); + rollingNoiseFloor = Math.max(0.018, Math.min(0.12, rollingNoiseFloor)); + rollingVoiceCeiling = + rawVoiceEnergy > rollingVoiceCeiling + ? rollingVoiceCeiling + (rawVoiceEnergy - rollingVoiceCeiling) * 0.08 + : rollingVoiceCeiling * 0.996 + 0.001; + rollingVoiceCeiling = Math.max(rollingNoiseFloor + 0.16, Math.min(0.58, rollingVoiceCeiling)); + + const adaptiveRange = Math.max(0.14, rollingVoiceCeiling - rollingNoiseFloor); + const absoluteVoice = clampVoiceTaskLevel((rawVoiceEnergy - 0.026) / 0.26); + const adaptiveVoice = clampVoiceTaskLevel((rawVoiceEnergy - rollingNoiseFloor * 1.12) / adaptiveRange); + const isMutedFrame = rawVoiceEnergy < rollingNoiseFloor + 0.012 && rms < 0.012; + const compressedVoice = isMutedFrame + ? 0 + : (1 - Math.exp(-(absoluteVoice * 0.66 + adaptiveVoice * 0.34) * 1.62)) * 0.92; + const voiceAttack = compressedVoice > smoothedVoiceLevel ? 0.46 : 0.2; + smoothedVoiceLevel += (compressedVoice - smoothedVoiceLevel) * voiceAttack; + + const now = performance.now(); + if (now - lastHistoryUpdate > 34) { + historyLevels.pop(); + historyLevels.unshift(smoothedVoiceLevel); + lastHistoryUpdate = now; + } + + const center = (VOICE_TASK_WAVEFORM_BAR_COUNT - 1) / 2; + const phase = now / 72; const nextLevels = Array.from({ length: VOICE_TASK_WAVEFORM_BAR_COUNT }, (_, index) => { - const start = Math.floor((index / VOICE_TASK_WAVEFORM_BAR_COUNT) * frequencyData.length); - const end = Math.floor(((index + 1) / VOICE_TASK_WAVEFORM_BAR_COUNT) * frequencyData.length); - const slice = frequencyData.slice(start, Math.max(start + 1, end)); - const average = slice.reduce((sum, value) => sum + value, 0) / slice.length; - return Math.max(0.08, Math.min(1, average / 165)); + const distanceFromCenter = Math.abs(index - center); + const historyIndex = Math.min(historyLevels.length - 1, Math.floor(distanceFromCenter)); + const nextHistoryIndex = Math.min(historyLevels.length - 1, historyIndex + 1); + const historyMix = distanceFromCenter - historyIndex; + const historyLevel = + historyLevels[historyIndex] + (historyLevels[nextHistoryIndex] - historyLevels[historyIndex]) * historyMix; + const frequencyOffset = distanceFromCenter / (center + 0.5); + const bandCenter = voiceStartBin + Math.floor((voiceEndBin - voiceStartBin) * frequencyOffset); + const bandRadius = Math.max(1, Math.floor((voiceEndBin - voiceStartBin) / VOICE_TASK_WAVEFORM_BAR_COUNT)); + let localFrequencyEnergy = 0; + let localFrequencyCount = 0; + + for ( + let bin = Math.max(voiceStartBin, bandCenter - bandRadius); + bin <= Math.min(voiceEndBin, bandCenter + bandRadius); + bin++ + ) { + localFrequencyEnergy += frequencyData[bin] / 255; + localFrequencyCount++; + } + + const spectralTexture = + localFrequencyCount > 0 + ? clampVoiceTaskLevel((localFrequencyEnergy / localFrequencyCount - rollingNoiseFloor) / adaptiveRange) + : 0; + const centerWeight = 0.44 + Math.pow(1 - frequencyOffset, 1.18) * 0.56; + const motion = 0.92 + Math.sin(phase + index * 0.78) * 0.045 + Math.sin(phase * 1.47 + index * 1.73) * 0.025; + const targetLevel = clampVoiceTaskLevel( + (historyLevel * 0.84 + spectralTexture * smoothedVoiceLevel * 0.16) * centerWeight * motion + ); + const previousLevel = previousLevels[index] ?? 0; + const smoothing = targetLevel > previousLevel ? 0.52 : 0.24; + const smoothedLevel = previousLevel + (targetLevel - previousLevel) * smoothing; + + previousLevels[index] = smoothedLevel; + return smoothedLevel; }); setAudioLevels(nextLevels);