diff --git a/docs_prod/2_voicetasker/VOICETASKER_TECH.md b/docs_prod/2_voicetasker/VOICETASKER_TECH.md index fe33bde..af491d1 100644 --- a/docs_prod/2_voicetasker/VOICETASKER_TECH.md +++ b/docs_prod/2_voicetasker/VOICETASKER_TECH.md @@ -390,12 +390,47 @@ Backend: Использовать workspace slug, как в существующих API routes Plane: ```http +GET /api/workspaces/:workspaceSlug/voice-task/preflight POST /api/workspaces/:workspaceSlug/voice-task/parse POST /api/workspaces/:workspaceSlug/voice-task/commit POST /api/workspaces/:workspaceSlug/voice-task/resolve-command ``` -### 7.1. Parse +### 7.1. Preflight + +```http +GET /api/workspaces/:workspaceSlug/voice-task/preflight +``` + +Назначение: + +- проверить, доступен ли Voice Tasker текущему пользователю; +- не раскрывать OpenAI key; +- вернуть max audio duration и допустимые mime types; +- дать frontend причину недоступности для disabled tooltip. + +Response: + +```json +{ + "available": true, + "reason": null, + "max_audio_duration_seconds": 120, + "accepted_mime_types": ["audio/webm", "audio/mp4", "audio/mpeg", "audio/wav"], + "access_mode": "all_workspace_members" +} +``` + +`reason` если недоступно: + +```txt +not_configured +disabled +missing_api_key +role_denied +``` + +### 7.2. Parse ```http POST /api/workspaces/:workspaceSlug/voice-task/parse diff --git a/plane-src/apps/api/plane/app/urls/voice_tasker.py b/plane-src/apps/api/plane/app/urls/voice_tasker.py index 24f9555..f62dd25 100644 --- a/plane-src/apps/api/plane/app/urls/voice_tasker.py +++ b/plane-src/apps/api/plane/app/urls/voice_tasker.py @@ -5,6 +5,8 @@ from django.urls import path from plane.app.views import ( + VoiceTaskParseEndpoint, + VoiceTaskPreflightEndpoint, WorkspaceAISettingsEndpoint, WorkspaceAISettingsTestConnectionEndpoint, ) @@ -21,4 +23,14 @@ urlpatterns = [ WorkspaceAISettingsTestConnectionEndpoint.as_view(), name="voice-tasker-settings-test-connection", ), + path( + "workspaces//voice-task/preflight/", + VoiceTaskPreflightEndpoint.as_view(), + name="voice-task-preflight", + ), + path( + "workspaces//voice-task/parse/", + VoiceTaskParseEndpoint.as_view(), + name="voice-task-parse", + ), ] diff --git a/plane-src/apps/api/plane/app/views/__init__.py b/plane-src/apps/api/plane/app/views/__init__.py index 2c963da..9b20662 100644 --- a/plane-src/apps/api/plane/app/views/__init__.py +++ b/plane-src/apps/api/plane/app/views/__init__.py @@ -244,6 +244,8 @@ from .webhook.base import ( ) from .voice_tasker import ( + VoiceTaskParseEndpoint, + VoiceTaskPreflightEndpoint, WorkspaceAISettingsEndpoint, WorkspaceAISettingsTestConnectionEndpoint, ) diff --git a/plane-src/apps/api/plane/app/views/voice_tasker.py b/plane-src/apps/api/plane/app/views/voice_tasker.py index 8844bce..7afc0e6 100644 --- a/plane-src/apps/api/plane/app/views/voice_tasker.py +++ b/plane-src/apps/api/plane/app/views/voice_tasker.py @@ -2,19 +2,65 @@ # SPDX-License-Identifier: AGPL-3.0-only # See the LICENSE file for details. +import json + from openai import OpenAI from rest_framework import status +from rest_framework.parsers import FormParser, MultiPartParser from rest_framework.response import Response from plane.app.permissions import ROLE, allow_permission from plane.app.serializers import WorkspaceAISettingsSerializer -from plane.db.models import Workspace, WorkspaceAICredential, WorkspaceAISettings +from plane.db.models import Workspace, WorkspaceAICredential, WorkspaceAISettings, WorkspaceMember from plane.license.utils.encryption import decrypt_data from plane.utils.exception_logger import log_exception from .base import BaseAPIView +VOICE_TASK_ACCEPTED_AUDIO_TYPES = ["audio/webm", "audio/mp4", "audio/mpeg", "audio/wav"] + + +def get_voice_task_preflight(workspace, user): + ai_settings = WorkspaceAISettings.objects.filter(workspace=workspace).first() + workspace_member = WorkspaceMember.objects.filter(workspace=workspace, member=user, is_active=True).first() + + response = { + "available": False, + "reason": "not_configured", + "max_audio_duration_seconds": 120, + "accepted_mime_types": VOICE_TASK_ACCEPTED_AUDIO_TYPES, + "access_mode": "all_workspace_members", + } + + if not ai_settings: + return response + + response["max_audio_duration_seconds"] = ai_settings.max_audio_duration_seconds + response["access_mode"] = ai_settings.access_mode + + if not ai_settings.voice_tasker_enabled: + response["reason"] = "disabled" + return response + + credential = WorkspaceAICredential.objects.filter( + workspace=workspace, + provider=ai_settings.provider, + is_active=True, + ).first() + if not credential or not credential.encrypted_api_key: + response["reason"] = "missing_api_key" + return response + + if ai_settings.access_mode == WorkspaceAISettings.AccessMode.ADMINS_ONLY: + if not workspace_member or workspace_member.role != ROLE.ADMIN.value: + response["reason"] = "role_denied" + return response + + response["available"] = True + response["reason"] = None + return response + class WorkspaceAISettingsEndpoint(BaseAPIView): def get_settings(self, slug): @@ -105,3 +151,81 @@ class WorkspaceAISettingsTestConnectionEndpoint(BaseAPIView): }, status=status_code, ) + + +class VoiceTaskPreflightEndpoint(BaseAPIView): + @allow_permission(allowed_roles=[ROLE.ADMIN, ROLE.MEMBER, ROLE.GUEST], level="WORKSPACE") + def get(self, request, slug): + workspace = Workspace.objects.get(slug=slug) + return Response(get_voice_task_preflight(workspace, request.user), status=status.HTTP_200_OK) + + +class VoiceTaskParseEndpoint(BaseAPIView): + parser_classes = (MultiPartParser, FormParser) + + @allow_permission(allowed_roles=[ROLE.ADMIN, ROLE.MEMBER, ROLE.GUEST], level="WORKSPACE") + def post(self, request, slug): + workspace = Workspace.objects.get(slug=slug) + preflight = get_voice_task_preflight(workspace, request.user) + + if not preflight["available"]: + response_status = status.HTTP_403_FORBIDDEN if preflight["reason"] == "role_denied" else status.HTTP_400_BAD_REQUEST + return Response( + { + "ok": False, + "code": preflight["reason"], + "error": "Voice Tasker is not available for this workspace.", + }, + status=response_status, + ) + + audio = request.FILES.get("audio") + if not audio: + return Response( + {"ok": False, "code": "missing_audio", "error": "Audio file is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + if audio.content_type not in VOICE_TASK_ACCEPTED_AUDIO_TYPES: + return Response( + {"ok": False, "code": "unsupported_audio_type", "error": "Unsupported audio file type."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + duration_seconds = float(request.data.get("duration_seconds", 0)) + except (TypeError, ValueError): + duration_seconds = 0 + + if duration_seconds <= 0: + return Response( + {"ok": False, "code": "invalid_duration", "error": "Audio duration is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + if duration_seconds > preflight["max_audio_duration_seconds"]: + return Response( + {"ok": False, "code": "audio_too_long", "error": "Audio duration exceeds workspace limit."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + client_context_raw = request.data.get("client_context") or "{}" + try: + client_context = json.loads(client_context_raw) + except (TypeError, json.JSONDecodeError): + client_context = {} + + return Response( + { + "ok": True, + "status": "uploaded", + "pipeline_status": "pending_openai_pipeline", + "audio": { + "content_type": audio.content_type, + "duration_seconds": duration_seconds, + "size": audio.size, + }, + "client_context": client_context, + }, + status=status.HTTP_202_ACCEPTED, + ) diff --git a/plane-src/apps/web/app/(all)/[workspaceSlug]/layout.tsx b/plane-src/apps/web/app/(all)/[workspaceSlug]/layout.tsx index 2936886..e3ea04b 100644 --- a/plane-src/apps/web/app/(all)/[workspaceSlug]/layout.tsx +++ b/plane-src/apps/web/app/(all)/[workspaceSlug]/layout.tsx @@ -19,7 +19,7 @@ export default function WorkspaceLayout(props: Route.ComponentProps) { - + diff --git a/plane-src/apps/web/ce/components/workspace/content-wrapper.tsx b/plane-src/apps/web/ce/components/workspace/content-wrapper.tsx index 46d1d60..5f3d417 100644 --- a/plane-src/apps/web/ce/components/workspace/content-wrapper.tsx +++ b/plane-src/apps/web/ce/components/workspace/content-wrapper.tsx @@ -10,13 +10,16 @@ import { observer } from "mobx-react"; import { cn } from "@plane/utils"; import { AppRailRoot } from "@/components/navigation"; import { useAppRailVisibility } from "@/lib/app-rail"; +import { VoiceTaskerGlobalControl } from "@/components/voice-tasker/global-control"; // local imports import { TopNavigationRoot } from "../navigations"; export const WorkspaceContentWrapper = observer(function WorkspaceContentWrapper({ children, + workspaceSlug, }: { children: React.ReactNode; + workspaceSlug?: string; }) { // Use the context to determine if app rail should render const { shouldRenderAppRail } = useAppRailVisibility(); @@ -37,6 +40,7 @@ export const WorkspaceContentWrapper = observer(function WorkspaceContentWrapper > {children} + {workspaceSlug && } ); diff --git a/plane-src/apps/web/core/components/voice-tasker/global-control.tsx b/plane-src/apps/web/core/components/voice-tasker/global-control.tsx new file mode 100644 index 0000000..5a1f76a --- /dev/null +++ b/plane-src/apps/web/core/components/voice-tasker/global-control.tsx @@ -0,0 +1,320 @@ +/** + * Copyright (c) 2023-present Plane Software, Inc. and contributors + * SPDX-License-Identifier: AGPL-3.0-only + * See the LICENSE file for details. + */ + +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import useSWR from "swr"; +import { Mic, RotateCcw, Square, Upload, X } from "lucide-react"; +// plane imports +import { Button } from "@plane/propel/button"; +import { Tooltip } from "@plane/propel/tooltip"; +import { TOAST_TYPE, setToast } from "@plane/propel/toast"; +import { EModalPosition, EModalWidth, ModalCore } from "@plane/ui"; +import { cn } from "@plane/utils"; +// services +import { WorkspaceAIService } from "@/services/workspace-ai.service"; + +const workspaceAIService = new WorkspaceAIService(); + +type TVoiceTaskerStatus = "idle" | "recording" | "uploading" | "success" | "error"; + +const UNAVAILABLE_LABELS = { + disabled: "AI-функции не активированы для этого workspace", + missing_api_key: "OpenAI key не сохранен для этого workspace", + not_configured: "AI-функции не настроены для этого workspace", + role_denied: "Voice Task недоступен для вашей роли", +} as const; + +function getSupportedMimeType() { + if (typeof MediaRecorder === "undefined") return ""; + + const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"]; + return candidates.find((candidate) => MediaRecorder.isTypeSupported(candidate)) ?? ""; +} + +function formatDuration(seconds: number) { + const roundedSeconds = Math.max(0, Math.floor(seconds)); + const minutes = Math.floor(roundedSeconds / 60); + const remainingSeconds = roundedSeconds % 60; + return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`; +} + +type Props = { + workspaceSlug: string; +}; + +export function VoiceTaskerGlobalControl({ workspaceSlug }: Props) { + const [isOpen, setIsOpen] = useState(false); + const [status, setStatus] = useState("idle"); + const [duration, setDuration] = useState(0); + const [audioBlob, setAudioBlob] = useState(null); + const [audioUrl, setAudioUrl] = useState(null); + const [error, setError] = useState(null); + + const mediaRecorderRef = useRef(null); + const streamRef = useRef(null); + const chunksRef = useRef([]); + const timerRef = useRef(null); + const startedAtRef = useRef(0); + + const { data: preflight } = useSWR( + workspaceSlug ? `VOICE_TASK_PREFLIGHT_${workspaceSlug}` : null, + workspaceSlug ? () => workspaceAIService.retrieveVoiceTaskPreflight(workspaceSlug) : null, + { refreshInterval: 30000 } + ); + + const maxDuration = preflight?.max_audio_duration_seconds ?? 120; + const isAvailable = !!preflight?.available; + const isRecording = status === "recording"; + const isUploading = status === "uploading"; + + const tooltipContent = useMemo(() => { + if (!preflight) return "Voice Task"; + if (preflight.available) return "Voice Task"; + return UNAVAILABLE_LABELS[preflight.reason ?? "not_configured"]; + }, [preflight]); + + const clearTimer = useCallback(() => { + if (timerRef.current) { + window.clearInterval(timerRef.current); + timerRef.current = null; + } + }, []); + + const stopStream = useCallback(() => { + streamRef.current?.getTracks().forEach((track) => track.stop()); + streamRef.current = null; + }, []); + + const stopRecording = useCallback(() => { + const recorder = mediaRecorderRef.current; + clearTimer(); + + if (recorder && recorder.state === "recording") { + recorder.stop(); + return; + } + + stopStream(); + }, [clearTimer, stopStream]); + + const resetRecording = useCallback(() => { + stopRecording(); + setAudioBlob(null); + setAudioUrl(null); + setDuration(0); + setError(null); + setStatus("idle"); + }, [stopRecording]); + + const handleClose = useCallback(() => { + resetRecording(); + setIsOpen(false); + }, [resetRecording]); + + useEffect( + () => () => { + clearTimer(); + stopStream(); + }, + [clearTimer, stopStream] + ); + + useEffect(() => { + if (!audioBlob) { + setAudioUrl(null); + return; + } + + const objectUrl = URL.createObjectURL(audioBlob); + setAudioUrl(objectUrl); + + return () => URL.revokeObjectURL(objectUrl); + }, [audioBlob]); + + const startRecording = async () => { + if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === "undefined") { + setError("Браузер не поддерживает запись аудио."); + setStatus("error"); + return; + } + + try { + resetRecording(); + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, + noiseSuppression: true, + }, + }); + const mimeType = getSupportedMimeType(); + const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined); + + chunksRef.current = []; + streamRef.current = stream; + mediaRecorderRef.current = recorder; + + recorder.ondataavailable = (event) => { + if (event.data.size > 0) chunksRef.current.push(event.data); + }; + recorder.onstop = () => { + const type = recorder.mimeType || mimeType || "audio/webm"; + setAudioBlob(new Blob(chunksRef.current, { type })); + setStatus("idle"); + stopStream(); + }; + + recorder.start(); + startedAtRef.current = Date.now(); + setDuration(0); + setError(null); + setStatus("recording"); + + timerRef.current = window.setInterval(() => { + const elapsed = (Date.now() - startedAtRef.current) / 1000; + setDuration(elapsed); + if (elapsed >= maxDuration) stopRecording(); + }, 250); + } catch { + setError("Не удалось получить доступ к микрофону."); + setStatus("error"); + stopStream(); + clearTimer(); + } + }; + + const uploadAudio = async () => { + if (!audioBlob) return; + + setStatus("uploading"); + setError(null); + + const audioType = audioBlob.type || "audio/webm"; + const extension = audioType.includes("mp4") ? "m4a" : "webm"; + const formData = new FormData(); + formData.append("audio", audioBlob, `voice-task.${extension}`); + formData.append("duration_seconds", String(Math.max(1, Math.ceil(duration)))); + formData.append( + "client_context", + JSON.stringify({ + current_page: window.location.pathname, + locale: navigator.language, + timezone: Intl.DateTimeFormat().resolvedOptions().timeZone, + }) + ); + + try { + await workspaceAIService.uploadVoiceTaskAudio(workspaceSlug, formData); + setStatus("success"); + setToast({ + type: TOAST_TYPE.SUCCESS, + title: "Аудио отправлено", + message: "Backend принял запись. Распознавание будет подключено следующим этапом.", + }); + } catch (err) { + const message = typeof err === "object" && err && "error" in err ? String(err.error) : "Не удалось отправить аудио."; + setError(message); + setStatus("error"); + setToast({ + type: TOAST_TYPE.ERROR, + title: "Voice Task не отправлен", + message, + }); + } + }; + + return ( + <> +
+ + + +
+ + +
+
+
+

Voice Task

+

Запись до {maxDuration} секунд

+
+ +
+ +
+
+
+
{formatDuration(duration)}
+
+ {status === "success" ? "Audio uploaded" : isRecording ? "Recording" : "Ready"} +
+
+
+ +
+
+ + {audioUrl && !isRecording && ( + + )} + + {error && ( +
+ {error} +
+ )} +
+ +
+ {audioBlob && !isRecording && ( + + )} + + +
+
+
+ + ); +} diff --git a/plane-src/apps/web/core/services/workspace-ai.service.ts b/plane-src/apps/web/core/services/workspace-ai.service.ts index c47ddf0..de78767 100644 --- a/plane-src/apps/web/core/services/workspace-ai.service.ts +++ b/plane-src/apps/web/core/services/workspace-ai.service.ts @@ -6,6 +6,8 @@ import { API_BASE_URL } from "@plane/constants"; import type { + TVoiceTaskPreflight, + TVoiceTaskUploadResult, TWorkspaceAIConnectionTestResult, TWorkspaceAISettings, TWorkspaceAISettingsPayload, @@ -43,4 +45,20 @@ export class WorkspaceAIService extends APIService { throw error?.response?.data; }); } + + async retrieveVoiceTaskPreflight(workspaceSlug: string): Promise { + return this.get(`/api/workspaces/${workspaceSlug}/voice-task/preflight/`) + .then((response) => response?.data) + .catch((error) => { + throw error?.response?.data; + }); + } + + async uploadVoiceTaskAudio(workspaceSlug: string, data: FormData): Promise { + return this.post(`/api/workspaces/${workspaceSlug}/voice-task/parse/`, data) + .then((response) => response?.data) + .catch((error) => { + throw error?.response?.data; + }); + } } diff --git a/plane-src/packages/types/src/ai.ts b/plane-src/packages/types/src/ai.ts index b6b0128..ab499fe 100644 --- a/plane-src/packages/types/src/ai.ts +++ b/plane-src/packages/types/src/ai.ts @@ -65,3 +65,27 @@ export type TWorkspaceAIConnectionTestResult = { code?: string; error?: string; }; + +export type TVoiceTaskPreflightReason = "not_configured" | "disabled" | "missing_api_key" | "role_denied" | null; + +export type TVoiceTaskPreflight = { + available: boolean; + reason: TVoiceTaskPreflightReason; + max_audio_duration_seconds: number; + accepted_mime_types: string[]; + access_mode: TWorkspaceAIAccessMode; +}; + +export type TVoiceTaskUploadResult = { + ok: boolean; + status?: "uploaded"; + pipeline_status?: "pending_openai_pipeline"; + audio?: { + content_type: string; + duration_seconds: number; + size: number; + }; + client_context?: Record; + code?: string; + error?: string; +};