This commit is contained in:
quicksandzn
2025-06-20 11:49:50 +08:00
commit a817a85316
27 changed files with 1502 additions and 0 deletions
+165
View File
@@ -0,0 +1,165 @@
import requests
API_ENDPOINT = "https://api.minimaxi.com/v1"
class MiniMaxBaseTool:
def __init__(self, api_key: str, group_id: str):
self.api_key = api_key
self.group_id = group_id
if not self.api_key:
raise ValueError("Api key are required")
if not self.group_id:
raise ValueError("Group id are required")
def _get_headers(self) -> dict:
headers = {
"Authorization": f"Bearer {self.api_key}",
}
return headers
def _request(self, method: str, url: str, **kwargs) -> requests.Response:
return requests.request(method, url, headers=self._get_headers(), **kwargs)
def text_to_image(
self,
model: str,
prompt: str,
aspect_ratio: str,
response_format: str,
prompt_optimizer: bool,
n: int,
) -> requests.Response:
response = self._request(
"POST",
f"{API_ENDPOINT}/image_generation",
json={
"model": model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"response_format": response_format,
"prompt_optimizer": prompt_optimizer,
"n": n,
},
)
return response
def music_upload(
self, file_name: str, file_blob: bytes, mime_type: str
) -> requests.Response:
files = [("file", (file_name, file_blob, mime_type))]
response = self._request(
"POST",
f"{API_ENDPOINT}/music_upload",
data={"purpose": "song"},
files=files,
)
return response
def music_generation(
self,
model: str,
refer_voice: str,
refer_instrumental: str,
refer_vocal: str,
lyrics: str,
) -> requests.Response:
response = self._request(
"POST",
f"{API_ENDPOINT}/music_generation",
json={
"model": model,
"refer_voice": refer_voice,
"refer_instrumental": refer_instrumental,
"refer_vocal": refer_vocal,
"lyrics": lyrics,
},
)
return response
def file_upload(
self, file_name: str, file_blob: bytes, mime_type: str, purpose: str
) -> requests.Response:
files = [("file", (file_name, file_blob, mime_type))]
response = self._request(
"POST",
f"{API_ENDPOINT}/files/upload",
params={
"GroupId": self.group_id,
},
data={"purpose": purpose},
files=files,
)
return response
def voice_clone(
self,
model: str,
file_id: str,
voice_id: str,
text: str,
accuracy: float,
need_noise_reduction: bool,
need_volume_normalization: bool,
) -> requests.Response:
response = self._request(
"POST",
f"{API_ENDPOINT}/voice_clone",
json={
"model": model,
"file_id": file_id,
"voice_id": voice_id,
"text": text,
"accuracy": accuracy,
"need_noise_reduction": need_noise_reduction,
"need_volume_normalization": need_volume_normalization,
},
)
return response
def video_generation_task(self, task_id: str):
response = self._request(
"GET",
f"{API_ENDPOINT}/query/video_generation",
params={"task_id": task_id},
)
return response
def video_generation(
self,
model: str,
prompt: str,
prompt_optimizer: bool,
duration: int,
resolution: str,
first_frame_image: str,
) -> requests.Response:
response = self._request(
"POST",
f"{API_ENDPOINT}/video_generation",
data={
"model": model,
"prompt": prompt,
"prompt_optimizer": prompt_optimizer,
"duration": duration,
"resolution": resolution,
"first_frame_image": first_frame_image,
},
)
return response
def file_retrieve(self, file_id: str) -> requests.Response:
response = self._request(
"GET",
f"{API_ENDPOINT}/files/retrieve",
params={"GroupId": self.group_id, "file_id": file_id},
)
return response
def file_list(self, purpose: str) -> requests.Response:
response = self._request(
"GET",
f"{API_ENDPOINT}/files/list",
params={"GroupId": self.group_id, "purpose": purpose},
)
return response
+51
View File
@@ -0,0 +1,51 @@
from collections.abc import Generator
from typing import Any
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.base import MiniMaxBaseTool
class MiniMaxImageGenerationTool(Tool):
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
api_key = self.runtime.credentials.get("api_key")
group_id = self.runtime.credentials.get("group_id")
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
model = tool_parameters.get("model")
prompt = tool_parameters.get("prompt")
aspect_ratio = tool_parameters.get("aspect_ratio")
prompt_optimizer = tool_parameters.get("prompt_optimizer")
n = tool_parameters.get("n")
response = minimax.text_to_image(
model=model,
prompt=prompt,
aspect_ratio=aspect_ratio,
response_format="url",
prompt_optimizer=prompt_optimizer,
n=n,
)
if response.status_code != 200:
yield self.create_text_message(
f"Image generation failed {response.status_code} {response.text}"
)
return
status_code = response.json().get("base_resp", {}).get("status_code", -1)
if status_code != 0:
yield self.create_text_message(f"Image generation failed {response.text}")
return
image_data = response.json().get("data", {})
image_urls = image_data.get("image_urls")
if not image_urls:
yield self.create_text_message(f"Image generation failed {response.text}")
return
for image_url in image_urls:
yield self.create_image_message(image_url)
image_data = {
"image_urls": image_urls,
}
yield self.create_json_message(image_data)
+102
View File
@@ -0,0 +1,102 @@
identity:
name: image_generation
author: quicksandzn
label:
en_US: Image Generation
zh_Hans: 图片生成
description:
human:
en_US: Image Generation
zh_Hans: 图片生成
llm: Generate images using text prompt words
parameters:
- name: model
type: select
required: true
options:
- value: image-01
label:
en_US: image-01
zh_Hans: image-01
- value: image-01-live
label:
en_US: image-01-live
zh_Hans: image-01-live
default: image-01
label:
en_US: Model Name
zh_Hans: 模型名称
human_description:
en_US: Model Name
zh_CN: 模型名称
form: form
- name: prompt
type: string
required: true
label:
en_US: Prompt
zh_Hans: 文本提示词
human_description:
en_US: Generate the description of the image.
zh_CN: 生成图像的描述
form: llm
- name: aspect_ratio
type: select
required: false
options:
- value: "1:1"
label:
en_US: "1:1"
zh_Hans: " 1:1"
- value: "16:9"
label:
en_US: "16:9"
zh_Hans: "16:9"
- value: "4:3"
label:
en_US: "4:3"
zh_Hans: "4:3"
- value: "3:2"
label:
en_US: "3:2"
zh_Hans: "3:2"
- value: "2:3"
label:
en_US: "2:3"
zh_Hans: "2:3"
- value: "3:4"
label:
en_US: "3:4"
zh_Hans: "3:4"
- value: "9:16"
label:
en_US: "9:16"
zh_Hans: "9:16"
- value: "21:9"
label:
en_US: "21:9"
zh_Hans: "21:9"
default: "1:1"
label:
en_US: Aspect Ratio
zh_Hans: 宽高比
human_description:
en_US: Used to control the aspect ratio of the generated image
zh_CN: 用于控制生成图像的宽高比
form: form
- name: n
type: number
required: false
default: 1
max: 9
min: 1
label:
en_US: Number of generated
zh_Hans: 生成数量
human_description:
en_US: Generate the description of the image.
zh_CN: Used to control the number of images generated in a single request
form: form
extra:
python:
source: tools/image_generation.py
+65
View File
@@ -0,0 +1,65 @@
from collections.abc import Generator
from typing import Any
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.base import MiniMaxBaseTool
class MiniMaxMusicGenerationTool(Tool):
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
api_key = self.runtime.credentials.get("api_key")
group_id = self.runtime.credentials.get("group_id")
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
model = tool_parameters.get("model")
song_file = tool_parameters.get("song")
refer_vocal = tool_parameters.get("refer_vocal")
lyrics = tool_parameters.get("lyrics")
upload_response = minimax.music_upload(
file_name=song_file.filename,
file_blob=song_file.blob,
mime_type=song_file.mime_type,
)
if upload_response.status_code != 200:
yield self.create_text_message(
f"Music generation upload failed {upload_response.status_code} {upload_response.text}"
)
return
upload_data = upload_response.json()
voice_id = upload_data.get("voice_id")
instrumental_id = upload_data.get("instrumental_id")
if not voice_id or not instrumental_id:
yield self.create_text_message(
f"Music generation upload failed {upload_response.text}"
)
return
gen_response = minimax.music_generation(
model=model,
refer_voice=voice_id,
refer_instrumental=instrumental_id,
refer_vocal=None if not refer_vocal else refer_vocal,
lyrics=lyrics,
)
if gen_response.status_code != 200:
yield self.create_text_message(
f"Music generation failed {gen_response.status_code} {gen_response.text}"
)
return
status_code = gen_response.json().get("base_resp", {}).get("status_code", -1)
if status_code != 0:
yield self.create_text_message(
f"Music generation failed {gen_response.text}"
)
return
audio_hex = gen_response.json().get("data", {}).get("audio")
if not audio_hex:
yield self.create_text_message(
f"Music generation failed {gen_response.text}"
)
return
(self.create_text_message("Audio generated successfully"),)
yield self.create_blob_message(
blob=bytes.fromhex(audio_hex), meta={"mime_type": "audio/mpeg"}
)
+64
View File
@@ -0,0 +1,64 @@
identity:
name: music_generation
author: quicksandzn
label:
en_US: Music Generation
zh_Hans: 音乐生成
description:
human:
en_US: Music Generation
zh_Hans: 音乐生成
llm: Music Generation
parameters:
- name: model
type: select
required: true
options:
- value: music-01
label:
en_US: music-01
zh_Hans: music-01
default: music-01
label:
en_US: Model Name
zh_Hans: 模型名称
human_description:
en_US: Model Name
zh_CN: 模型名称
form: form
- name: song
type: file
required: true
label:
en_US: Song File
zh_Hans: 歌曲文件
human_description:
en_US: Song File
zh_CN: 歌曲文件
form: llm
- name: refer_vocal
type: string
required: false
label:
en_US: Voice ID
zh_Hans: 声音ID
human_description:
en_US: The sound ID used to replace the generated music timbre when generating music
zh_CN: 生成音乐时用来替换生成音乐音色的声音ID
form: form
- name: lyrics
type: string
required: false
label:
en_US: Lyrics
zh_Hans: 歌词
help:
en_US: "Lyrics: Use line breaks (\n) to separate each line of lyrics. Use two consecutive line breaks (\n\n) to add pauses in the middle of the lyrics. Use double hyphens (##) at the beginning and end to add accompaniment. Supports up to 200 characters (each Chinese character, punctuation mark, and letter counts as one character)."
zh_CN: "歌词,使用换行符(\n)分隔每行歌词,使用两个连续换行符(\n\n)可以在歌词中间添加停顿,使用双井号(##)添加在首尾可以添加伴奏,支持最长200字符(每个汉字、标点和字母都算1个字符)。"
human_description:
en_US: Lyrics
zh_CN: 歌词
form: llm
extra:
python:
source: tools/music_generation.py
+103
View File
@@ -0,0 +1,103 @@
import time
from collections.abc import Generator
from typing import Any
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.base import MiniMaxBaseTool
import logging
from dify_plugin.config.logger_format import plugin_logger_handler
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(plugin_logger_handler)
class MiniMaxVideoGenerationTool(Tool):
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
api_key = self.runtime.credentials.get("api_key")
group_id = self.runtime.credentials.get("group_id")
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
model = tool_parameters.get("model")
prompt = tool_parameters.get("prompt")
prompt_optimizer = tool_parameters.get("prompt_optimizer")
duration = tool_parameters.get("duration")
resolution = tool_parameters.get("resolution")
first_frame_image = tool_parameters.get("first_frame_image")
response = minimax.video_generation(
model=model,
prompt=prompt,
prompt_optimizer=prompt_optimizer,
duration=duration,
resolution=resolution,
first_frame_image=None if not first_frame_image else first_frame_image,
)
if response.status_code != 200:
yield self.create_text_message(
f"Video generation failed {response.status_code} {response.text}"
)
return
task_id = response.json().get("task_id")
if not task_id:
yield self.create_text_message(f"Video generation failed {response.text}")
return
yield self.create_text_message(f"Video generation task id {task_id}")
max_retries = 100
retry_count = 0
interval = 5
video_file_id = None
while retry_count < max_retries:
time.sleep(interval)
task_response = minimax.video_generation_task(task_id=task_id)
if task_response.status_code != 200:
yield self.create_text_message(
f"Video generation task failed {task_response.status_code} {task_response.text}"
)
break
task_json = task_response.json()
status_code = task_json.get("base_resp", {}).get("status_code", -1)
if status_code != 0:
yield self.create_text_message(
f"Video generation task failed {task_response.text}"
)
break
task_status = task_json.get("status")
match task_status:
case "Preparing":
logger.debug("Video generation task status preparing")
case "Queueing":
logger.debug("Video generation task status queueing")
case "Processing":
logger.debug("Video generation task status processing")
case "Success":
video_file_id = task_json.get("file_id")
yield self.create_text_message(
"Video generation task status Success"
)
break
case "failed":
yield self.create_text_message(
f"Video generation task status failed {task_response.text}"
)
break
retry_count += 1
if not video_file_id:
yield self.create_text_message("Video generation failed")
return
file_response = minimax.file_retrieve(file_id=video_file_id)
if file_response.status_code != 200:
yield self.create_text_message(
f"Video generation get file failed {file_response.status_code} {file_response.text}"
)
return
video_url = file_response.json().get("file", {}).get("download_url")
yield self.create_image_message(video_url)
video_data = {"video_url": video_url}
yield self.create_json_message(video_data)
+126
View File
@@ -0,0 +1,126 @@
identity:
name: video_generation
author: quicksandzn
label:
en_US: Video Generation
zh_Hans: 视频生成
description:
human:
en_US: Video Generation
zh_Hans: 视频生成
llm: Video Generation
parameters:
- name: model
type: select
required: true
options:
- value: MiniMax-Hailuo-02
label:
en_US: MiniMax-Hailuo-02
zh_Hans: MiniMax-Hailuo-02
- value: T2V-01-Director
label:
en_US: T2V-01-Director
zh_Hans: T2V-01-Director
- value: I2V-01-Director
label:
en_US: I2V-01-Director
zh_Hans: I2V-01-Director
- value: S2V-01
label:
en_US: S2V-01
zh_Hans: S2V-01
- value: I2V-01-live
label:
en_US: I2V-01-live
zh_Hans: I2V-01-live
- value: I2V-01
label:
en_US: I2V-01
zh_Hans: I2V-01
- value: T2V-01
label:
en_US: T2V-01
zh_Hans: T2V-01
default: MiniMax-Hailuo-02
label:
en_US: Model Name
zh_Hans: 模型名称
human_description:
en_US: Model Name
zh_CN: 模型名称
form: form
- name: prompt
type: string
required: false
label:
en_US: Prompt
zh_Hans: 文本提示词
human_description:
en_US: Generate the description of the video.
zh_CN: 生成视频的描述
form: llm
- name: prompt_optimizer
type: boolean
required: false
default: true
label:
en_US: Prompt Optimizer
zh_Hans: 提示词优化
human_description:
en_US: The model will automatically optimize the incoming prompt
zh_CN: 模型会自动优化传入的prompt
form: form
- name: duration
type: select
required: true
options:
- value: "6"
label:
en_US: "6"
zh_Hans: "6"
- value: "10"
label:
en_US: "10"
zh_Hans: "10"
default: "6"
label:
en_US: Video Duration
zh_Hans: 视频时长
human_description:
en_US: Generated video duration
zh_CN: 生成视频时长
form: form
- name: resolution
type: select
required: true
options:
- value: 768P
label:
en_US: 768P
zh_Hans: 768P
- value: 1080P
label:
en_US: 1080P
zh_Hans: 1080P
default: 768P
label:
en_US: Resolution
zh_Hans: 分辨率
human_description:
en_US: Resolution
zh_CN: 分辨率
form: form
- name: first_frame_image
type: string
required: false
label:
en_US: First Frame Image
zh_Hans: 首帧画面
human_description:
en_US: The model will generate a video based on the image passed in this parameter as the first frame
zh_CN: 模型将以此参数中传入的图片为首帧画面来生成视频
form: form
extra:
python:
source: tools/video_generation.py
+77
View File
@@ -0,0 +1,77 @@
import uuid
from collections.abc import Generator
from typing import Any
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
from tools.base import MiniMaxBaseTool
class MiniMaxVoiceCloneTool(Tool):
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
api_key = self.runtime.credentials.get("api_key")
group_id = self.runtime.credentials.get("group_id")
minimax = MiniMaxBaseTool(api_key=api_key, group_id=group_id)
model = tool_parameters.get("model")
ref_voice = tool_parameters.get("ref_voice")
voice_id = tool_parameters.get("voice_id")
if not voice_id:
voice_id = f"voice_{uuid.uuid4()}"
text = tool_parameters.get("text")
accuracy = tool_parameters.get("accuracy", 0.7)
need_noise_reduction = tool_parameters.get("need_noise_reduction", False)
need_volume_normalization = tool_parameters.get(
"need_volume_normalization", False
)
upload_response = minimax.file_upload(
file_name=ref_voice.filename,
file_blob=ref_voice.blob,
mime_type=ref_voice.mime_type,
purpose="voice_clone",
)
if upload_response.status_code != 200:
yield self.create_text_message(
f"Voice clone upload failed {upload_response.status_code} {upload_response.text}"
)
return
upload_data = upload_response.json().get("file", {})
file_id = upload_data.get("file_id")
if not file_id:
yield self.create_text_message(
f"Voice clone upload failed {upload_response.text}"
)
return
clone_response = minimax.voice_clone(
model=model,
file_id=file_id,
voice_id=voice_id,
text=text,
accuracy=accuracy,
need_noise_reduction=need_noise_reduction,
need_volume_normalization=need_volume_normalization,
)
if clone_response.status_code != 200:
yield self.create_text_message(
f"Voice clone failed {clone_response.status_code} {clone_response.text}"
)
return
status_code = clone_response.json().get("base_resp", {}).get("status_code", -1)
if status_code != 0:
yield self.create_text_message(f"Voice clone failed {clone_response.text}")
return
demo_audio = clone_response.json().get("demo_audio")
if demo_audio:
yield self.create_text_message(demo_audio)
# response = requests.get(demo_audio, timeout=60)
# response.raise_for_status()
# yield self.create_blob_message(
# blob=response.content, meta={"mime_type": "audio/mpeg"}
# )
voice_clone_data = {"voice_id": voice_id, "demo_audio": demo_audio}
yield self.create_json_message(voice_clone_data)
+109
View File
@@ -0,0 +1,109 @@
identity:
name: voice_clone
author: quicksandzn
label:
en_US: Voice Clone
zh_Hans: 声音克隆
description:
human:
en_US: Voice Clone
zh_Hans: 声音克隆
llm: Voice Clone
parameters:
- name: text
type: string
required: false
label:
en_US: Text
zh_Hans: 试听文本
human_description:
en_US: Text
zh_CN: 试听文本
form: llm
- name: model
type: select
required: true
options:
- value: speech-02-hd
label:
en_US: speech-02-hd
zh_Hans: speech-02-hd
- value: speech-02-turbo
label:
en_US: speech-02-turbo
zh_Hans: speech-02-turbo
- value: speech-01-hd
label:
en_US: speech-01-hd
zh_Hans: speech-01-hd
- value: speech-01-turbo
label:
en_US: speech-01-turbo
zh_Hans: speech-01-turbo
default: speech-02-hd
label:
en_US: Model Name
zh_Hans: 模型名称
human_description:
en_US: Model Name
zh_CN: 模型名称
form: form
- name: ref_voice
type: file
required: true
label:
en_US: Reference Voice File
zh_Hans: 参考声音文件
human_description:
en_US: Reference Voice File
zh_CN: 参考声音文件
form: llm
- name: voice_id
type: string
required: false
label:
en_US: Voice ID Customize
zh_Hans: 声音ID (自定义)
human_description:
en_US: Voice ID, It will be automatically generated if it is empty
zh_CN: 声音ID,如果为空自动生成
form: form
- name: accuracy
type: number
required: false
min: 0.1
max: 1
default: 0.7
label:
en_US: Accuracy threshold
zh_Hans: 文本校验准确率阈值
human_description:
en_US: Accuracy threshold
zh_CN: 文本校验准确率阈值
form: form
- name: need_noise_reduction
type: boolean
required: false
default: false
label:
en_US: Noise reduction
zh_Hans: 是否开启降噪
human_description:
en_US: Noise reduction
zh_CN: 是否开启降噪
form: form
- name: need_volume_normalization
type: boolean
required: false
default: false
label:
en_US: Volume normalization
zh_Hans: 是否开启音量归一化
human_description:
en_US: Volume normalization
zh_CN: 是否开启音量归一化
form: form
extra:
python:
source: tools/voice_clone.py