init
This commit is contained in:
+165
@@ -0,0 +1,165 @@
|
||||
import requests
|
||||
|
||||
API_ENDPOINT = "https://api.minimaxi.com/v1"
|
||||
|
||||
|
||||
class MiniMaxBaseTool:
|
||||
def __init__(self, api_key: str, group_id: str):
|
||||
self.api_key = api_key
|
||||
self.group_id = group_id
|
||||
if not self.api_key:
|
||||
raise ValueError("Api key are required")
|
||||
if not self.group_id:
|
||||
raise ValueError("Group id are required")
|
||||
|
||||
def _get_headers(self) -> dict:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
}
|
||||
return headers
|
||||
|
||||
def _request(self, method: str, url: str, **kwargs) -> requests.Response:
|
||||
return requests.request(method, url, headers=self._get_headers(), **kwargs)
|
||||
|
||||
def text_to_image(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
response_format: str,
|
||||
prompt_optimizer: bool,
|
||||
n: int,
|
||||
) -> requests.Response:
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/image_generation",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"response_format": response_format,
|
||||
"prompt_optimizer": prompt_optimizer,
|
||||
"n": n,
|
||||
},
|
||||
)
|
||||
return response
|
||||
|
||||
def music_upload(
|
||||
self, file_name: str, file_blob: bytes, mime_type: str
|
||||
) -> requests.Response:
|
||||
files = [("file", (file_name, file_blob, mime_type))]
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/music_upload",
|
||||
data={"purpose": "song"},
|
||||
files=files,
|
||||
)
|
||||
return response
|
||||
|
||||
def music_generation(
|
||||
self,
|
||||
model: str,
|
||||
refer_voice: str,
|
||||
refer_instrumental: str,
|
||||
refer_vocal: str,
|
||||
lyrics: str,
|
||||
) -> requests.Response:
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/music_generation",
|
||||
json={
|
||||
"model": model,
|
||||
"refer_voice": refer_voice,
|
||||
"refer_instrumental": refer_instrumental,
|
||||
"refer_vocal": refer_vocal,
|
||||
"lyrics": lyrics,
|
||||
},
|
||||
)
|
||||
return response
|
||||
|
||||
def file_upload(
|
||||
self, file_name: str, file_blob: bytes, mime_type: str, purpose: str
|
||||
) -> requests.Response:
|
||||
files = [("file", (file_name, file_blob, mime_type))]
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/files/upload",
|
||||
params={
|
||||
"GroupId": self.group_id,
|
||||
},
|
||||
data={"purpose": purpose},
|
||||
files=files,
|
||||
)
|
||||
return response
|
||||
|
||||
def voice_clone(
|
||||
self,
|
||||
model: str,
|
||||
file_id: str,
|
||||
voice_id: str,
|
||||
text: str,
|
||||
accuracy: float,
|
||||
need_noise_reduction: bool,
|
||||
need_volume_normalization: bool,
|
||||
) -> requests.Response:
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/voice_clone",
|
||||
json={
|
||||
"model": model,
|
||||
"file_id": file_id,
|
||||
"voice_id": voice_id,
|
||||
"text": text,
|
||||
"accuracy": accuracy,
|
||||
"need_noise_reduction": need_noise_reduction,
|
||||
"need_volume_normalization": need_volume_normalization,
|
||||
},
|
||||
)
|
||||
return response
|
||||
|
||||
def video_generation_task(self, task_id: str):
|
||||
response = self._request(
|
||||
"GET",
|
||||
f"{API_ENDPOINT}/query/video_generation",
|
||||
params={"task_id": task_id},
|
||||
)
|
||||
return response
|
||||
|
||||
def video_generation(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
prompt_optimizer: bool,
|
||||
duration: int,
|
||||
resolution: str,
|
||||
first_frame_image: str,
|
||||
) -> requests.Response:
|
||||
response = self._request(
|
||||
"POST",
|
||||
f"{API_ENDPOINT}/video_generation",
|
||||
data={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"prompt_optimizer": prompt_optimizer,
|
||||
"duration": duration,
|
||||
"resolution": resolution,
|
||||
"first_frame_image": first_frame_image,
|
||||
},
|
||||
)
|
||||
return response
|
||||
|
||||
def file_retrieve(self, file_id: str) -> requests.Response:
|
||||
response = self._request(
|
||||
"GET",
|
||||
f"{API_ENDPOINT}/files/retrieve",
|
||||
params={"GroupId": self.group_id, "file_id": file_id},
|
||||
)
|
||||
return response
|
||||
|
||||
def file_list(self, purpose: str) -> requests.Response:
|
||||
response = self._request(
|
||||
"GET",
|
||||
f"{API_ENDPOINT}/files/list",
|
||||
params={"GroupId": self.group_id, "purpose": purpose},
|
||||
)
|
||||
return response
|
||||
@@ -0,0 +1,51 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from dify_plugin import Tool
|
||||
from dify_plugin.entities.tool import ToolInvokeMessage
|
||||
from tools.base import MiniMaxBaseTool
|
||||
|
||||
|
||||
class MiniMaxImageGenerationTool(Tool):
|
||||
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
|
||||
api_key = self.runtime.credentials.get("api_key")
|
||||
group_id = self.runtime.credentials.get("group_id")
|
||||
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
|
||||
|
||||
model = tool_parameters.get("model")
|
||||
prompt = tool_parameters.get("prompt")
|
||||
aspect_ratio = tool_parameters.get("aspect_ratio")
|
||||
prompt_optimizer = tool_parameters.get("prompt_optimizer")
|
||||
n = tool_parameters.get("n")
|
||||
|
||||
response = minimax.text_to_image(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect_ratio,
|
||||
response_format="url",
|
||||
prompt_optimizer=prompt_optimizer,
|
||||
n=n,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Image generation failed {response.status_code} {response.text}"
|
||||
)
|
||||
return
|
||||
status_code = response.json().get("base_resp", {}).get("status_code", -1)
|
||||
if status_code != 0:
|
||||
yield self.create_text_message(f"Image generation failed {response.text}")
|
||||
return
|
||||
image_data = response.json().get("data", {})
|
||||
image_urls = image_data.get("image_urls")
|
||||
|
||||
if not image_urls:
|
||||
yield self.create_text_message(f"Image generation failed {response.text}")
|
||||
return
|
||||
|
||||
for image_url in image_urls:
|
||||
yield self.create_image_message(image_url)
|
||||
|
||||
image_data = {
|
||||
"image_urls": image_urls,
|
||||
}
|
||||
|
||||
yield self.create_json_message(image_data)
|
||||
@@ -0,0 +1,102 @@
|
||||
identity:
|
||||
name: image_generation
|
||||
author: quicksandzn
|
||||
label:
|
||||
en_US: Image Generation
|
||||
zh_Hans: 图片生成
|
||||
description:
|
||||
human:
|
||||
en_US: Image Generation
|
||||
zh_Hans: 图片生成
|
||||
llm: Generate images using text prompt words
|
||||
parameters:
|
||||
- name: model
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: image-01
|
||||
label:
|
||||
en_US: image-01
|
||||
zh_Hans: image-01
|
||||
- value: image-01-live
|
||||
label:
|
||||
en_US: image-01-live
|
||||
zh_Hans: image-01-live
|
||||
default: image-01
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
human_description:
|
||||
en_US: Model Name
|
||||
zh_CN: 模型名称
|
||||
form: form
|
||||
- name: prompt
|
||||
type: string
|
||||
required: true
|
||||
label:
|
||||
en_US: Prompt
|
||||
zh_Hans: 文本提示词
|
||||
human_description:
|
||||
en_US: Generate the description of the image.
|
||||
zh_CN: 生成图像的描述
|
||||
form: llm
|
||||
- name: aspect_ratio
|
||||
type: select
|
||||
required: false
|
||||
options:
|
||||
- value: "1:1"
|
||||
label:
|
||||
en_US: "1:1"
|
||||
zh_Hans: " 1:1"
|
||||
- value: "16:9"
|
||||
label:
|
||||
en_US: "16:9"
|
||||
zh_Hans: "16:9"
|
||||
- value: "4:3"
|
||||
label:
|
||||
en_US: "4:3"
|
||||
zh_Hans: "4:3"
|
||||
- value: "3:2"
|
||||
label:
|
||||
en_US: "3:2"
|
||||
zh_Hans: "3:2"
|
||||
- value: "2:3"
|
||||
label:
|
||||
en_US: "2:3"
|
||||
zh_Hans: "2:3"
|
||||
- value: "3:4"
|
||||
label:
|
||||
en_US: "3:4"
|
||||
zh_Hans: "3:4"
|
||||
- value: "9:16"
|
||||
label:
|
||||
en_US: "9:16"
|
||||
zh_Hans: "9:16"
|
||||
- value: "21:9"
|
||||
label:
|
||||
en_US: "21:9"
|
||||
zh_Hans: "21:9"
|
||||
default: "1:1"
|
||||
label:
|
||||
en_US: Aspect Ratio
|
||||
zh_Hans: 宽高比
|
||||
human_description:
|
||||
en_US: Used to control the aspect ratio of the generated image
|
||||
zh_CN: 用于控制生成图像的宽高比
|
||||
form: form
|
||||
- name: n
|
||||
type: number
|
||||
required: false
|
||||
default: 1
|
||||
max: 9
|
||||
min: 1
|
||||
label:
|
||||
en_US: Number of generated
|
||||
zh_Hans: 生成数量
|
||||
human_description:
|
||||
en_US: Generate the description of the image.
|
||||
zh_CN: Used to control the number of images generated in a single request
|
||||
form: form
|
||||
extra:
|
||||
python:
|
||||
source: tools/image_generation.py
|
||||
@@ -0,0 +1,65 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from dify_plugin import Tool
|
||||
from dify_plugin.entities.tool import ToolInvokeMessage
|
||||
from tools.base import MiniMaxBaseTool
|
||||
|
||||
|
||||
class MiniMaxMusicGenerationTool(Tool):
|
||||
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
|
||||
api_key = self.runtime.credentials.get("api_key")
|
||||
group_id = self.runtime.credentials.get("group_id")
|
||||
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
|
||||
|
||||
model = tool_parameters.get("model")
|
||||
song_file = tool_parameters.get("song")
|
||||
refer_vocal = tool_parameters.get("refer_vocal")
|
||||
lyrics = tool_parameters.get("lyrics")
|
||||
|
||||
upload_response = minimax.music_upload(
|
||||
file_name=song_file.filename,
|
||||
file_blob=song_file.blob,
|
||||
mime_type=song_file.mime_type,
|
||||
)
|
||||
if upload_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Music generation upload failed {upload_response.status_code} {upload_response.text}"
|
||||
)
|
||||
return
|
||||
upload_data = upload_response.json()
|
||||
voice_id = upload_data.get("voice_id")
|
||||
instrumental_id = upload_data.get("instrumental_id")
|
||||
if not voice_id or not instrumental_id:
|
||||
yield self.create_text_message(
|
||||
f"Music generation upload failed {upload_response.text}"
|
||||
)
|
||||
return
|
||||
gen_response = minimax.music_generation(
|
||||
model=model,
|
||||
refer_voice=voice_id,
|
||||
refer_instrumental=instrumental_id,
|
||||
refer_vocal=None if not refer_vocal else refer_vocal,
|
||||
lyrics=lyrics,
|
||||
)
|
||||
if gen_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Music generation failed {gen_response.status_code} {gen_response.text}"
|
||||
)
|
||||
return
|
||||
status_code = gen_response.json().get("base_resp", {}).get("status_code", -1)
|
||||
if status_code != 0:
|
||||
yield self.create_text_message(
|
||||
f"Music generation failed {gen_response.text}"
|
||||
)
|
||||
return
|
||||
audio_hex = gen_response.json().get("data", {}).get("audio")
|
||||
|
||||
if not audio_hex:
|
||||
yield self.create_text_message(
|
||||
f"Music generation failed {gen_response.text}"
|
||||
)
|
||||
return
|
||||
(self.create_text_message("Audio generated successfully"),)
|
||||
yield self.create_blob_message(
|
||||
blob=bytes.fromhex(audio_hex), meta={"mime_type": "audio/mpeg"}
|
||||
)
|
||||
@@ -0,0 +1,64 @@
|
||||
identity:
|
||||
name: music_generation
|
||||
author: quicksandzn
|
||||
label:
|
||||
en_US: Music Generation
|
||||
zh_Hans: 音乐生成
|
||||
description:
|
||||
human:
|
||||
en_US: Music Generation
|
||||
zh_Hans: 音乐生成
|
||||
llm: Music Generation
|
||||
parameters:
|
||||
- name: model
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: music-01
|
||||
label:
|
||||
en_US: music-01
|
||||
zh_Hans: music-01
|
||||
default: music-01
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
human_description:
|
||||
en_US: Model Name
|
||||
zh_CN: 模型名称
|
||||
form: form
|
||||
- name: song
|
||||
type: file
|
||||
required: true
|
||||
label:
|
||||
en_US: Song File
|
||||
zh_Hans: 歌曲文件
|
||||
human_description:
|
||||
en_US: Song File
|
||||
zh_CN: 歌曲文件
|
||||
form: llm
|
||||
- name: refer_vocal
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Voice ID
|
||||
zh_Hans: 声音ID
|
||||
human_description:
|
||||
en_US: The sound ID used to replace the generated music timbre when generating music
|
||||
zh_CN: 生成音乐时用来替换生成音乐音色的声音ID
|
||||
form: form
|
||||
- name: lyrics
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Lyrics
|
||||
zh_Hans: 歌词
|
||||
help:
|
||||
en_US: "Lyrics: Use line breaks (\n) to separate each line of lyrics. Use two consecutive line breaks (\n\n) to add pauses in the middle of the lyrics. Use double hyphens (##) at the beginning and end to add accompaniment. Supports up to 200 characters (each Chinese character, punctuation mark, and letter counts as one character)."
|
||||
zh_CN: "歌词,使用换行符(\n)分隔每行歌词,使用两个连续换行符(\n\n)可以在歌词中间添加停顿,使用双井号(##)添加在首尾可以添加伴奏,支持最长200字符(每个汉字、标点和字母都算1个字符)。"
|
||||
human_description:
|
||||
en_US: Lyrics
|
||||
zh_CN: 歌词
|
||||
form: llm
|
||||
extra:
|
||||
python:
|
||||
source: tools/music_generation.py
|
||||
@@ -0,0 +1,103 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from dify_plugin import Tool
|
||||
from dify_plugin.entities.tool import ToolInvokeMessage
|
||||
from tools.base import MiniMaxBaseTool
|
||||
import logging
|
||||
from dify_plugin.config.logger_format import plugin_logger_handler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.addHandler(plugin_logger_handler)
|
||||
|
||||
|
||||
class MiniMaxVideoGenerationTool(Tool):
|
||||
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
|
||||
api_key = self.runtime.credentials.get("api_key")
|
||||
group_id = self.runtime.credentials.get("group_id")
|
||||
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
|
||||
|
||||
model = tool_parameters.get("model")
|
||||
prompt = tool_parameters.get("prompt")
|
||||
prompt_optimizer = tool_parameters.get("prompt_optimizer")
|
||||
duration = tool_parameters.get("duration")
|
||||
resolution = tool_parameters.get("resolution")
|
||||
first_frame_image = tool_parameters.get("first_frame_image")
|
||||
|
||||
response = minimax.video_generation(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
prompt_optimizer=prompt_optimizer,
|
||||
duration=duration,
|
||||
resolution=resolution,
|
||||
first_frame_image=None if not first_frame_image else first_frame_image,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Video generation failed {response.status_code} {response.text}"
|
||||
)
|
||||
return
|
||||
task_id = response.json().get("task_id")
|
||||
if not task_id:
|
||||
yield self.create_text_message(f"Video generation failed {response.text}")
|
||||
return
|
||||
yield self.create_text_message(f"Video generation task id {task_id}")
|
||||
max_retries = 100
|
||||
retry_count = 0
|
||||
interval = 5
|
||||
video_file_id = None
|
||||
|
||||
while retry_count < max_retries:
|
||||
time.sleep(interval)
|
||||
|
||||
task_response = minimax.video_generation_task(task_id=task_id)
|
||||
if task_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Video generation task failed {task_response.status_code} {task_response.text}"
|
||||
)
|
||||
break
|
||||
task_json = task_response.json()
|
||||
status_code = task_json.get("base_resp", {}).get("status_code", -1)
|
||||
if status_code != 0:
|
||||
yield self.create_text_message(
|
||||
f"Video generation task failed {task_response.text}"
|
||||
)
|
||||
break
|
||||
|
||||
task_status = task_json.get("status")
|
||||
|
||||
match task_status:
|
||||
case "Preparing":
|
||||
logger.debug("Video generation task status preparing")
|
||||
case "Queueing":
|
||||
logger.debug("Video generation task status queueing")
|
||||
case "Processing":
|
||||
logger.debug("Video generation task status processing")
|
||||
case "Success":
|
||||
video_file_id = task_json.get("file_id")
|
||||
yield self.create_text_message(
|
||||
"Video generation task status Success"
|
||||
)
|
||||
break
|
||||
case "failed":
|
||||
yield self.create_text_message(
|
||||
f"Video generation task status failed {task_response.text}"
|
||||
)
|
||||
break
|
||||
|
||||
retry_count += 1
|
||||
|
||||
if not video_file_id:
|
||||
yield self.create_text_message("Video generation failed")
|
||||
return
|
||||
file_response = minimax.file_retrieve(file_id=video_file_id)
|
||||
if file_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Video generation get file failed {file_response.status_code} {file_response.text}"
|
||||
)
|
||||
return
|
||||
video_url = file_response.json().get("file", {}).get("download_url")
|
||||
yield self.create_image_message(video_url)
|
||||
video_data = {"video_url": video_url}
|
||||
yield self.create_json_message(video_data)
|
||||
@@ -0,0 +1,126 @@
|
||||
identity:
|
||||
name: video_generation
|
||||
author: quicksandzn
|
||||
label:
|
||||
en_US: Video Generation
|
||||
zh_Hans: 视频生成
|
||||
description:
|
||||
human:
|
||||
en_US: Video Generation
|
||||
zh_Hans: 视频生成
|
||||
llm: Video Generation
|
||||
parameters:
|
||||
- name: model
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: MiniMax-Hailuo-02
|
||||
label:
|
||||
en_US: MiniMax-Hailuo-02
|
||||
zh_Hans: MiniMax-Hailuo-02
|
||||
- value: T2V-01-Director
|
||||
label:
|
||||
en_US: T2V-01-Director
|
||||
zh_Hans: T2V-01-Director
|
||||
- value: I2V-01-Director
|
||||
label:
|
||||
en_US: I2V-01-Director
|
||||
zh_Hans: I2V-01-Director
|
||||
- value: S2V-01
|
||||
label:
|
||||
en_US: S2V-01
|
||||
zh_Hans: S2V-01
|
||||
- value: I2V-01-live
|
||||
label:
|
||||
en_US: I2V-01-live
|
||||
zh_Hans: I2V-01-live
|
||||
- value: I2V-01
|
||||
label:
|
||||
en_US: I2V-01
|
||||
zh_Hans: I2V-01
|
||||
- value: T2V-01
|
||||
label:
|
||||
en_US: T2V-01
|
||||
zh_Hans: T2V-01
|
||||
default: MiniMax-Hailuo-02
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
human_description:
|
||||
en_US: Model Name
|
||||
zh_CN: 模型名称
|
||||
form: form
|
||||
- name: prompt
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Prompt
|
||||
zh_Hans: 文本提示词
|
||||
human_description:
|
||||
en_US: Generate the description of the video.
|
||||
zh_CN: 生成视频的描述
|
||||
form: llm
|
||||
- name: prompt_optimizer
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
label:
|
||||
en_US: Prompt Optimizer
|
||||
zh_Hans: 提示词优化
|
||||
human_description:
|
||||
en_US: The model will automatically optimize the incoming prompt
|
||||
zh_CN: 模型会自动优化传入的prompt
|
||||
form: form
|
||||
- name: duration
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: "6"
|
||||
label:
|
||||
en_US: "6"
|
||||
zh_Hans: "6"
|
||||
- value: "10"
|
||||
label:
|
||||
en_US: "10"
|
||||
zh_Hans: "10"
|
||||
default: "6"
|
||||
label:
|
||||
en_US: Video Duration
|
||||
zh_Hans: 视频时长
|
||||
human_description:
|
||||
en_US: Generated video duration
|
||||
zh_CN: 生成视频时长
|
||||
form: form
|
||||
- name: resolution
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: 768P
|
||||
label:
|
||||
en_US: 768P
|
||||
zh_Hans: 768P
|
||||
- value: 1080P
|
||||
label:
|
||||
en_US: 1080P
|
||||
zh_Hans: 1080P
|
||||
default: 768P
|
||||
label:
|
||||
en_US: Resolution
|
||||
zh_Hans: 分辨率
|
||||
human_description:
|
||||
en_US: Resolution
|
||||
zh_CN: 分辨率
|
||||
form: form
|
||||
- name: first_frame_image
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: First Frame Image
|
||||
zh_Hans: 首帧画面
|
||||
human_description:
|
||||
en_US: The model will generate a video based on the image passed in this parameter as the first frame
|
||||
zh_CN: 模型将以此参数中传入的图片为首帧画面来生成视频
|
||||
form: form
|
||||
extra:
|
||||
python:
|
||||
source: tools/video_generation.py
|
||||
@@ -0,0 +1,77 @@
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
|
||||
from dify_plugin import Tool
|
||||
from dify_plugin.entities.tool import ToolInvokeMessage
|
||||
from tools.base import MiniMaxBaseTool
|
||||
|
||||
|
||||
class MiniMaxVoiceCloneTool(Tool):
|
||||
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
|
||||
api_key = self.runtime.credentials.get("api_key")
|
||||
group_id = self.runtime.credentials.get("group_id")
|
||||
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
|
||||
|
||||
model = tool_parameters.get("model")
|
||||
ref_voice = tool_parameters.get("ref_voice")
|
||||
voice_id = tool_parameters.get("voice_id")
|
||||
if not voice_id:
|
||||
voice_id = f"voice_{uuid.uuid4()}"
|
||||
text = tool_parameters.get("text")
|
||||
accuracy = tool_parameters.get("accuracy", 0.7)
|
||||
need_noise_reduction = tool_parameters.get("need_noise_reduction", False)
|
||||
need_volume_normalization = tool_parameters.get(
|
||||
"need_volume_normalization", False
|
||||
)
|
||||
|
||||
upload_response = minimax.file_upload(
|
||||
file_name=ref_voice.filename,
|
||||
file_blob=ref_voice.blob,
|
||||
mime_type=ref_voice.mime_type,
|
||||
purpose="voice_clone",
|
||||
)
|
||||
if upload_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Voice clone upload failed {upload_response.status_code} {upload_response.text}"
|
||||
)
|
||||
return
|
||||
upload_data = upload_response.json().get("file", {})
|
||||
file_id = upload_data.get("file_id")
|
||||
if not file_id:
|
||||
yield self.create_text_message(
|
||||
f"Voice clone upload failed {upload_response.text}"
|
||||
)
|
||||
return
|
||||
clone_response = minimax.voice_clone(
|
||||
model=model,
|
||||
file_id=file_id,
|
||||
voice_id=voice_id,
|
||||
text=text,
|
||||
accuracy=accuracy,
|
||||
need_noise_reduction=need_noise_reduction,
|
||||
need_volume_normalization=need_volume_normalization,
|
||||
)
|
||||
if clone_response.status_code != 200:
|
||||
yield self.create_text_message(
|
||||
f"Voice clone failed {clone_response.status_code} {clone_response.text}"
|
||||
)
|
||||
return
|
||||
status_code = clone_response.json().get("base_resp", {}).get("status_code", -1)
|
||||
if status_code != 0:
|
||||
yield self.create_text_message(f"Voice clone failed {clone_response.text}")
|
||||
return
|
||||
|
||||
demo_audio = clone_response.json().get("demo_audio")
|
||||
|
||||
if demo_audio:
|
||||
yield self.create_text_message(demo_audio)
|
||||
|
||||
# response = requests.get(demo_audio, timeout=60)
|
||||
# response.raise_for_status()
|
||||
# yield self.create_blob_message(
|
||||
# blob=response.content, meta={"mime_type": "audio/mpeg"}
|
||||
# )
|
||||
|
||||
voice_clone_data = {"voice_id": voice_id, "demo_audio": demo_audio}
|
||||
yield self.create_json_message(voice_clone_data)
|
||||
@@ -0,0 +1,109 @@
|
||||
identity:
|
||||
name: voice_clone
|
||||
author: quicksandzn
|
||||
label:
|
||||
en_US: Voice Clone
|
||||
zh_Hans: 声音克隆
|
||||
description:
|
||||
human:
|
||||
en_US: Voice Clone
|
||||
zh_Hans: 声音克隆
|
||||
llm: Voice Clone
|
||||
parameters:
|
||||
- name: text
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Text
|
||||
zh_Hans: 试听文本
|
||||
human_description:
|
||||
en_US: Text
|
||||
zh_CN: 试听文本
|
||||
form: llm
|
||||
- name: model
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- value: speech-02-hd
|
||||
label:
|
||||
en_US: speech-02-hd
|
||||
zh_Hans: speech-02-hd
|
||||
- value: speech-02-turbo
|
||||
label:
|
||||
en_US: speech-02-turbo
|
||||
zh_Hans: speech-02-turbo
|
||||
- value: speech-01-hd
|
||||
label:
|
||||
en_US: speech-01-hd
|
||||
zh_Hans: speech-01-hd
|
||||
- value: speech-01-turbo
|
||||
label:
|
||||
en_US: speech-01-turbo
|
||||
zh_Hans: speech-01-turbo
|
||||
default: speech-02-hd
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
human_description:
|
||||
en_US: Model Name
|
||||
zh_CN: 模型名称
|
||||
form: form
|
||||
- name: ref_voice
|
||||
type: file
|
||||
required: true
|
||||
label:
|
||||
en_US: Reference Voice File
|
||||
zh_Hans: 参考声音文件
|
||||
human_description:
|
||||
en_US: Reference Voice File
|
||||
zh_CN: 参考声音文件
|
||||
form: llm
|
||||
- name: voice_id
|
||||
type: string
|
||||
required: false
|
||||
label:
|
||||
en_US: Voice ID (Customize)
|
||||
zh_Hans: 声音ID (自定义)
|
||||
human_description:
|
||||
en_US: Voice ID, It will be automatically generated if it is empty
|
||||
zh_CN: 声音ID,如果为空自动生成
|
||||
form: form
|
||||
- name: accuracy
|
||||
type: number
|
||||
required: false
|
||||
min: 0.1
|
||||
max: 1
|
||||
default: 0.7
|
||||
label:
|
||||
en_US: Accuracy threshold
|
||||
zh_Hans: 文本校验准确率阈值
|
||||
human_description:
|
||||
en_US: Accuracy threshold
|
||||
zh_CN: 文本校验准确率阈值
|
||||
form: form
|
||||
- name: need_noise_reduction
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
label:
|
||||
en_US: Noise reduction
|
||||
zh_Hans: 是否开启降噪
|
||||
human_description:
|
||||
en_US: Noise reduction
|
||||
zh_CN: 是否开启降噪
|
||||
form: form
|
||||
- name: need_volume_normalization
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
label:
|
||||
en_US: Volume normalization
|
||||
zh_Hans: 是否开启音量归一化
|
||||
human_description:
|
||||
en_US: Volume normalization
|
||||
zh_CN: 是否开启音量归一化
|
||||
form: form
|
||||
|
||||
extra:
|
||||
python:
|
||||
source: tools/voice_clone.py
|
||||
Reference in New Issue
Block a user