ФУНКЦИИ - МЕЖПРОЕКТНАЯ КОММУНИКАЦИЯ: preflight и запись audio для Voice Tasker

This commit is contained in:
DCCONSTRUCTIONS 2026-04-24 17:21:34 +03:00
parent 237c7964cd
commit 3c19c3175f
9 changed files with 542 additions and 3 deletions

View File

@ -390,12 +390,47 @@ Backend:
Использовать workspace slug, как в существующих API routes Plane:
```http
GET /api/workspaces/:workspaceSlug/voice-task/preflight
POST /api/workspaces/:workspaceSlug/voice-task/parse
POST /api/workspaces/:workspaceSlug/voice-task/commit
POST /api/workspaces/:workspaceSlug/voice-task/resolve-command
```
### 7.1. Parse
### 7.1. Preflight
```http
GET /api/workspaces/:workspaceSlug/voice-task/preflight
```
Назначение:
- проверить, доступен ли Voice Tasker текущему пользователю;
- не раскрывать OpenAI key;
- вернуть max audio duration и допустимые mime types;
- дать frontend причину недоступности для disabled tooltip.
Response:
```json
{
"available": true,
"reason": null,
"max_audio_duration_seconds": 120,
"accepted_mime_types": ["audio/webm", "audio/mp4", "audio/mpeg", "audio/wav"],
"access_mode": "all_workspace_members"
}
```
`reason` если недоступно:
```txt
not_configured
disabled
missing_api_key
role_denied
```
### 7.2. Parse
```http
POST /api/workspaces/:workspaceSlug/voice-task/parse

View File

@ -5,6 +5,8 @@
from django.urls import path
from plane.app.views import (
VoiceTaskParseEndpoint,
VoiceTaskPreflightEndpoint,
WorkspaceAISettingsEndpoint,
WorkspaceAISettingsTestConnectionEndpoint,
)
@ -21,4 +23,14 @@ urlpatterns = [
WorkspaceAISettingsTestConnectionEndpoint.as_view(),
name="voice-tasker-settings-test-connection",
),
path(
"workspaces/<str:slug>/voice-task/preflight/",
VoiceTaskPreflightEndpoint.as_view(),
name="voice-task-preflight",
),
path(
"workspaces/<str:slug>/voice-task/parse/",
VoiceTaskParseEndpoint.as_view(),
name="voice-task-parse",
),
]

View File

@ -244,6 +244,8 @@ from .webhook.base import (
)
from .voice_tasker import (
VoiceTaskParseEndpoint,
VoiceTaskPreflightEndpoint,
WorkspaceAISettingsEndpoint,
WorkspaceAISettingsTestConnectionEndpoint,
)

View File

@ -2,19 +2,65 @@
# SPDX-License-Identifier: AGPL-3.0-only
# See the LICENSE file for details.
import json
from openai import OpenAI
from rest_framework import status
from rest_framework.parsers import FormParser, MultiPartParser
from rest_framework.response import Response
from plane.app.permissions import ROLE, allow_permission
from plane.app.serializers import WorkspaceAISettingsSerializer
from plane.db.models import Workspace, WorkspaceAICredential, WorkspaceAISettings
from plane.db.models import Workspace, WorkspaceAICredential, WorkspaceAISettings, WorkspaceMember
from plane.license.utils.encryption import decrypt_data
from plane.utils.exception_logger import log_exception
from .base import BaseAPIView
VOICE_TASK_ACCEPTED_AUDIO_TYPES = ["audio/webm", "audio/mp4", "audio/mpeg", "audio/wav"]
def get_voice_task_preflight(workspace, user):
ai_settings = WorkspaceAISettings.objects.filter(workspace=workspace).first()
workspace_member = WorkspaceMember.objects.filter(workspace=workspace, member=user, is_active=True).first()
response = {
"available": False,
"reason": "not_configured",
"max_audio_duration_seconds": 120,
"accepted_mime_types": VOICE_TASK_ACCEPTED_AUDIO_TYPES,
"access_mode": "all_workspace_members",
}
if not ai_settings:
return response
response["max_audio_duration_seconds"] = ai_settings.max_audio_duration_seconds
response["access_mode"] = ai_settings.access_mode
if not ai_settings.voice_tasker_enabled:
response["reason"] = "disabled"
return response
credential = WorkspaceAICredential.objects.filter(
workspace=workspace,
provider=ai_settings.provider,
is_active=True,
).first()
if not credential or not credential.encrypted_api_key:
response["reason"] = "missing_api_key"
return response
if ai_settings.access_mode == WorkspaceAISettings.AccessMode.ADMINS_ONLY:
if not workspace_member or workspace_member.role != ROLE.ADMIN.value:
response["reason"] = "role_denied"
return response
response["available"] = True
response["reason"] = None
return response
class WorkspaceAISettingsEndpoint(BaseAPIView):
def get_settings(self, slug):
@ -105,3 +151,81 @@ class WorkspaceAISettingsTestConnectionEndpoint(BaseAPIView):
},
status=status_code,
)
class VoiceTaskPreflightEndpoint(BaseAPIView):
@allow_permission(allowed_roles=[ROLE.ADMIN, ROLE.MEMBER, ROLE.GUEST], level="WORKSPACE")
def get(self, request, slug):
workspace = Workspace.objects.get(slug=slug)
return Response(get_voice_task_preflight(workspace, request.user), status=status.HTTP_200_OK)
class VoiceTaskParseEndpoint(BaseAPIView):
parser_classes = (MultiPartParser, FormParser)
@allow_permission(allowed_roles=[ROLE.ADMIN, ROLE.MEMBER, ROLE.GUEST], level="WORKSPACE")
def post(self, request, slug):
workspace = Workspace.objects.get(slug=slug)
preflight = get_voice_task_preflight(workspace, request.user)
if not preflight["available"]:
response_status = status.HTTP_403_FORBIDDEN if preflight["reason"] == "role_denied" else status.HTTP_400_BAD_REQUEST
return Response(
{
"ok": False,
"code": preflight["reason"],
"error": "Voice Tasker is not available for this workspace.",
},
status=response_status,
)
audio = request.FILES.get("audio")
if not audio:
return Response(
{"ok": False, "code": "missing_audio", "error": "Audio file is required."},
status=status.HTTP_400_BAD_REQUEST,
)
if audio.content_type not in VOICE_TASK_ACCEPTED_AUDIO_TYPES:
return Response(
{"ok": False, "code": "unsupported_audio_type", "error": "Unsupported audio file type."},
status=status.HTTP_400_BAD_REQUEST,
)
try:
duration_seconds = float(request.data.get("duration_seconds", 0))
except (TypeError, ValueError):
duration_seconds = 0
if duration_seconds <= 0:
return Response(
{"ok": False, "code": "invalid_duration", "error": "Audio duration is required."},
status=status.HTTP_400_BAD_REQUEST,
)
if duration_seconds > preflight["max_audio_duration_seconds"]:
return Response(
{"ok": False, "code": "audio_too_long", "error": "Audio duration exceeds workspace limit."},
status=status.HTTP_400_BAD_REQUEST,
)
client_context_raw = request.data.get("client_context") or "{}"
try:
client_context = json.loads(client_context_raw)
except (TypeError, json.JSONDecodeError):
client_context = {}
return Response(
{
"ok": True,
"status": "uploaded",
"pipeline_status": "pending_openai_pipeline",
"audio": {
"content_type": audio.content_type,
"duration_seconds": duration_seconds,
"size": audio.size,
},
"client_context": client_context,
},
status=status.HTTP_202_ACCEPTED,
)

View File

@ -19,7 +19,7 @@ export default function WorkspaceLayout(props: Route.ComponentProps) {
<AuthenticationWrapper>
<WorkspaceAuthWrapper>
<AppRailVisibilityProvider>
<WorkspaceContentWrapper>
<WorkspaceContentWrapper workspaceSlug={workspaceSlug}>
<GlobalModals workspaceSlug={workspaceSlug} />
<Outlet />
</WorkspaceContentWrapper>

View File

@ -10,13 +10,16 @@ import { observer } from "mobx-react";
import { cn } from "@plane/utils";
import { AppRailRoot } from "@/components/navigation";
import { useAppRailVisibility } from "@/lib/app-rail";
import { VoiceTaskerGlobalControl } from "@/components/voice-tasker/global-control";
// local imports
import { TopNavigationRoot } from "../navigations";
export const WorkspaceContentWrapper = observer(function WorkspaceContentWrapper({
children,
workspaceSlug,
}: {
children: React.ReactNode;
workspaceSlug?: string;
}) {
// Use the context to determine if app rail should render
const { shouldRenderAppRail } = useAppRailVisibility();
@ -37,6 +40,7 @@ export const WorkspaceContentWrapper = observer(function WorkspaceContentWrapper
>
{children}
</div>
{workspaceSlug && <VoiceTaskerGlobalControl workspaceSlug={workspaceSlug} />}
</div>
</div>
);

View File

@ -0,0 +1,320 @@
/**
* Copyright (c) 2023-present Plane Software, Inc. and contributors
* SPDX-License-Identifier: AGPL-3.0-only
* See the LICENSE file for details.
*/
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import useSWR from "swr";
import { Mic, RotateCcw, Square, Upload, X } from "lucide-react";
// plane imports
import { Button } from "@plane/propel/button";
import { Tooltip } from "@plane/propel/tooltip";
import { TOAST_TYPE, setToast } from "@plane/propel/toast";
import { EModalPosition, EModalWidth, ModalCore } from "@plane/ui";
import { cn } from "@plane/utils";
// services
import { WorkspaceAIService } from "@/services/workspace-ai.service";
const workspaceAIService = new WorkspaceAIService();
type TVoiceTaskerStatus = "idle" | "recording" | "uploading" | "success" | "error";
const UNAVAILABLE_LABELS = {
disabled: "AI-функции не активированы для этого workspace",
missing_api_key: "OpenAI key не сохранен для этого workspace",
not_configured: "AI-функции не настроены для этого workspace",
role_denied: "Voice Task недоступен для вашей роли",
} as const;
function getSupportedMimeType() {
if (typeof MediaRecorder === "undefined") return "";
const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"];
return candidates.find((candidate) => MediaRecorder.isTypeSupported(candidate)) ?? "";
}
function formatDuration(seconds: number) {
const roundedSeconds = Math.max(0, Math.floor(seconds));
const minutes = Math.floor(roundedSeconds / 60);
const remainingSeconds = roundedSeconds % 60;
return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`;
}
type Props = {
workspaceSlug: string;
};
export function VoiceTaskerGlobalControl({ workspaceSlug }: Props) {
const [isOpen, setIsOpen] = useState(false);
const [status, setStatus] = useState<TVoiceTaskerStatus>("idle");
const [duration, setDuration] = useState(0);
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const chunksRef = useRef<BlobPart[]>([]);
const timerRef = useRef<number | null>(null);
const startedAtRef = useRef(0);
const { data: preflight } = useSWR(
workspaceSlug ? `VOICE_TASK_PREFLIGHT_${workspaceSlug}` : null,
workspaceSlug ? () => workspaceAIService.retrieveVoiceTaskPreflight(workspaceSlug) : null,
{ refreshInterval: 30000 }
);
const maxDuration = preflight?.max_audio_duration_seconds ?? 120;
const isAvailable = !!preflight?.available;
const isRecording = status === "recording";
const isUploading = status === "uploading";
const tooltipContent = useMemo(() => {
if (!preflight) return "Voice Task";
if (preflight.available) return "Voice Task";
return UNAVAILABLE_LABELS[preflight.reason ?? "not_configured"];
}, [preflight]);
const clearTimer = useCallback(() => {
if (timerRef.current) {
window.clearInterval(timerRef.current);
timerRef.current = null;
}
}, []);
const stopStream = useCallback(() => {
streamRef.current?.getTracks().forEach((track) => track.stop());
streamRef.current = null;
}, []);
const stopRecording = useCallback(() => {
const recorder = mediaRecorderRef.current;
clearTimer();
if (recorder && recorder.state === "recording") {
recorder.stop();
return;
}
stopStream();
}, [clearTimer, stopStream]);
const resetRecording = useCallback(() => {
stopRecording();
setAudioBlob(null);
setAudioUrl(null);
setDuration(0);
setError(null);
setStatus("idle");
}, [stopRecording]);
const handleClose = useCallback(() => {
resetRecording();
setIsOpen(false);
}, [resetRecording]);
useEffect(
() => () => {
clearTimer();
stopStream();
},
[clearTimer, stopStream]
);
useEffect(() => {
if (!audioBlob) {
setAudioUrl(null);
return;
}
const objectUrl = URL.createObjectURL(audioBlob);
setAudioUrl(objectUrl);
return () => URL.revokeObjectURL(objectUrl);
}, [audioBlob]);
const startRecording = async () => {
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === "undefined") {
setError("Браузер не поддерживает запись аудио.");
setStatus("error");
return;
}
try {
resetRecording();
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
},
});
const mimeType = getSupportedMimeType();
const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
chunksRef.current = [];
streamRef.current = stream;
mediaRecorderRef.current = recorder;
recorder.ondataavailable = (event) => {
if (event.data.size > 0) chunksRef.current.push(event.data);
};
recorder.onstop = () => {
const type = recorder.mimeType || mimeType || "audio/webm";
setAudioBlob(new Blob(chunksRef.current, { type }));
setStatus("idle");
stopStream();
};
recorder.start();
startedAtRef.current = Date.now();
setDuration(0);
setError(null);
setStatus("recording");
timerRef.current = window.setInterval(() => {
const elapsed = (Date.now() - startedAtRef.current) / 1000;
setDuration(elapsed);
if (elapsed >= maxDuration) stopRecording();
}, 250);
} catch {
setError("Не удалось получить доступ к микрофону.");
setStatus("error");
stopStream();
clearTimer();
}
};
const uploadAudio = async () => {
if (!audioBlob) return;
setStatus("uploading");
setError(null);
const audioType = audioBlob.type || "audio/webm";
const extension = audioType.includes("mp4") ? "m4a" : "webm";
const formData = new FormData();
formData.append("audio", audioBlob, `voice-task.${extension}`);
formData.append("duration_seconds", String(Math.max(1, Math.ceil(duration))));
formData.append(
"client_context",
JSON.stringify({
current_page: window.location.pathname,
locale: navigator.language,
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
})
);
try {
await workspaceAIService.uploadVoiceTaskAudio(workspaceSlug, formData);
setStatus("success");
setToast({
type: TOAST_TYPE.SUCCESS,
title: "Аудио отправлено",
message: "Backend принял запись. Распознавание будет подключено следующим этапом.",
});
} catch (err) {
const message = typeof err === "object" && err && "error" in err ? String(err.error) : "Не удалось отправить аудио.";
setError(message);
setStatus("error");
setToast({
type: TOAST_TYPE.ERROR,
title: "Voice Task не отправлен",
message,
});
}
};
return (
<>
<div className="pointer-events-none fixed right-4 z-[29] bottom-[calc(var(--nodedc-bottom-dock-offset,0px)+1rem)]">
<Tooltip tooltipContent={tooltipContent} position="left">
<button
type="button"
className={cn(
"pointer-events-auto flex size-11 items-center justify-center rounded-full border-[0.5px] shadow-lg transition",
isAvailable
? "border-pink-500/40 bg-pink-500 text-white hover:bg-pink-600"
: "cursor-not-allowed border-subtle bg-layer-2 text-tertiary"
)}
disabled={!isAvailable}
onClick={() => setIsOpen(true)}
>
<Mic className="size-5" />
</button>
</Tooltip>
</div>
<ModalCore isOpen={isOpen} handleClose={handleClose} position={EModalPosition.CENTER} width={EModalWidth.MD}>
<div className="px-5 py-4">
<div className="flex items-start justify-between gap-4">
<div>
<h3 className="text-18 font-medium text-primary">Voice Task</h3>
<p className="mt-1 text-13 text-secondary">Запись до {maxDuration} секунд</p>
</div>
<button
type="button"
className="flex size-8 items-center justify-center rounded-md text-tertiary hover:bg-layer-2 hover:text-primary"
onClick={handleClose}
>
<X className="size-4" />
</button>
</div>
<div className="mt-5 rounded-lg border-[0.5px] border-subtle bg-layer-1 p-4">
<div className="flex items-center justify-between gap-4">
<div>
<div className="text-24 font-semibold text-primary">{formatDuration(duration)}</div>
<div className="mt-1 text-12 text-tertiary">
{status === "success" ? "Audio uploaded" : isRecording ? "Recording" : "Ready"}
</div>
</div>
<div
className={cn(
"flex size-14 items-center justify-center rounded-full",
isRecording ? "bg-red-500/15 text-red-500" : "bg-pink-500/10 text-pink-500"
)}
>
<Mic className={cn("size-6", { "animate-pulse": isRecording })} />
</div>
</div>
{audioUrl && !isRecording && (
<audio controls src={audioUrl} className="mt-4 w-full">
<track kind="captions" />
</audio>
)}
{error && (
<div className="mt-4 rounded-md border-[0.5px] border-red-500/30 bg-red-500/10 px-3 py-2 text-12 text-red-500">
{error}
</div>
)}
</div>
<div className="mt-5 flex flex-wrap justify-end gap-2">
{audioBlob && !isRecording && (
<Button variant="secondary" size="lg" onClick={resetRecording} disabled={isUploading}>
<RotateCcw className="mr-2 size-4" />
Перезаписать
</Button>
)}
<Button
variant={isRecording ? "error-fill" : "secondary"}
size="lg"
onClick={isRecording ? stopRecording : startRecording}
disabled={isUploading}
>
{isRecording ? <Square className="mr-2 size-4" /> : <Mic className="mr-2 size-4" />}
{isRecording ? "Стоп" : "Записать"}
</Button>
<Button variant="primary" size="lg" onClick={uploadAudio} loading={isUploading} disabled={!audioBlob || isRecording}>
<Upload className="mr-2 size-4" />
Отправить
</Button>
</div>
</div>
</ModalCore>
</>
);
}

View File

@ -6,6 +6,8 @@
import { API_BASE_URL } from "@plane/constants";
import type {
TVoiceTaskPreflight,
TVoiceTaskUploadResult,
TWorkspaceAIConnectionTestResult,
TWorkspaceAISettings,
TWorkspaceAISettingsPayload,
@ -43,4 +45,20 @@ export class WorkspaceAIService extends APIService {
throw error?.response?.data;
});
}
async retrieveVoiceTaskPreflight(workspaceSlug: string): Promise<TVoiceTaskPreflight> {
return this.get(`/api/workspaces/${workspaceSlug}/voice-task/preflight/`)
.then((response) => response?.data)
.catch((error) => {
throw error?.response?.data;
});
}
async uploadVoiceTaskAudio(workspaceSlug: string, data: FormData): Promise<TVoiceTaskUploadResult> {
return this.post(`/api/workspaces/${workspaceSlug}/voice-task/parse/`, data)
.then((response) => response?.data)
.catch((error) => {
throw error?.response?.data;
});
}
}

View File

@ -65,3 +65,27 @@ export type TWorkspaceAIConnectionTestResult = {
code?: string;
error?: string;
};
export type TVoiceTaskPreflightReason = "not_configured" | "disabled" | "missing_api_key" | "role_denied" | null;
export type TVoiceTaskPreflight = {
available: boolean;
reason: TVoiceTaskPreflightReason;
max_audio_duration_seconds: number;
accepted_mime_types: string[];
access_mode: TWorkspaceAIAccessMode;
};
export type TVoiceTaskUploadResult = {
ok: boolean;
status?: "uploaded";
pipeline_status?: "pending_openai_pipeline";
audio?: {
content_type: string;
duration_seconds: number;
size: number;
};
client_context?: Record<string, unknown>;
code?: string;
error?: string;
};