Arphd4 commited on
Commit
e24351b
·
1 Parent(s): 9f6025c

Full ARK-AI multi-modal setup

Browse files
Files changed (1) hide show
  1. app.py +78 -125
app.py CHANGED
@@ -1,126 +1,79 @@
1
- # app.py
2
- import gradio as gr
3
- from transformers import pipeline
4
-
5
- # =============================
6
- # LOAD YOUR MODELS
7
- # =============================
8
-
9
- # ----- Text models -----
10
- text_models = [
11
- pipeline("text-generation", model="openai/gpt-oss-120b"),
12
- pipeline("text-generation", model="deepseek-ai/DeepSeek-V3.1-Base"),
13
- pipeline("text-generation", model="zai-org/GLM-4.5"),
14
- pipeline("text-generation", model="zai-org/chatglm3-6b"),
15
- pipeline("text-generation", model="openai/gpt-oss-20b")
16
- ]
17
-
18
- # ----- Audio models -----
19
- tts_models = [
20
- pipeline("text-to-speech", model="sk0032/coqui-tts-model"),
21
- pipeline("text-to-speech", model="sysf/Edge-TTS")
22
- ]
23
-
24
- # ----- Whisper ASR -----
25
- speech_to_text_model = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
26
-
27
- # ----- Image models -----
28
- image_models = [
29
- pipeline("text-to-image", model="GD-ML/USP-Image_Generation"),
30
- pipeline("text-to-image", model="Qwen/Qwen-Image"),
31
- pipeline("text-to-image", model="Comfy-Org/Qwen-Image-DiffSynth-ControlNets"),
32
- pipeline("image-to-text", model="Salesforce/blip2-opt-2.7b"),
33
- pipeline("vision-to-text", model="Vision-CAIR/MiniGPT-4")
34
- ]
35
-
36
- # ----- Video models -----
37
- video_models = [
38
- pipeline("text-to-video", model="sudip1987/Generate_videos_with_Veo3"),
39
- pipeline("text-to-video", model="ali-vilab/text-to-video-ms-1.7b")
40
- ]
41
-
42
- # ----- Music model -----
43
- music_model = pipeline("text-to-music", model="sander-wood/text-to-music")
44
-
45
- # =============================
46
- # HELPER FUNCTIONS
47
- # =============================
48
-
49
- def merge_text_models(prompt):
50
- responses = []
51
- for model in text_models:
52
- try:
53
- out = model(prompt)[0]['generated_text']
54
- responses.append(out)
55
- except Exception as e:
56
- responses.append(f"[Model failed: {str(e)}]")
57
- return "\n---\n".join(responses)
58
-
59
- def generate_audio(text):
60
- audio_files = []
61
- for model in tts_models:
62
- try:
63
- audio_path = model(text)
64
- audio_files.append(audio_path)
65
- except Exception as e:
66
- audio_files.append(None)
67
- return audio_files[0] if audio_files else None
68
-
69
- def generate_images(text):
70
- imgs = []
71
- for model in image_models:
72
- try:
73
- img = model(text)[0]['image']
74
- imgs.append(img)
75
- except Exception as e:
76
- continue
77
- return imgs[:3] # Show top 3 images
78
-
79
- def generate_videos(text):
80
- vids = []
81
- for model in video_models:
82
- try:
83
- vid = model(text)
84
- vids.append(vid)
85
- except Exception as e:
86
- continue
87
- return vids[:1] # Show one video
88
-
89
- # =============================
90
- # MAIN ARK-AI FUNCTION
91
- # =============================
92
-
93
- def ark_ai_main(prompt):
94
- # Text
95
- text_output = merge_text_models(prompt)
96
-
97
- # Inject personality
98
- personality = "ARK-AI (fun, savage, chaotic-good) says:\n"
99
- full_text = personality + text_output
100
 
101
- # Media
102
- image_output = generate_images(prompt)
103
- video_output = generate_videos(prompt)
104
- audio_output = generate_audio(prompt)
105
-
106
- return full_text, image_output, video_output, audio_output
107
-
108
- # =============================
109
- # GRADIO INTERFACE
110
- # =============================
111
-
112
- iface = gr.Interface(
113
- fn=ark_ai_main,
114
- inputs=gr.Textbox(lines=3, placeholder="Ask ARK-AI anything..."),
115
- outputs=[
116
- gr.Textbox(label="ARK-AI Text Response"),
117
- gr.Gallery(label="Images Generated"),
118
- gr.Video(label="Video Generated"),
119
- gr.Audio(label="Audio Response")
120
- ],
121
- title="ARK-AI Multi-Modal Assistant",
122
- description="ARK-AI: Savage, funny, chaotic-good AI assistant merging text, image, audio, and video models.",
123
- css="styles.css" # Optional: liquid-glass UI
124
- )
125
-
126
- iface.launch()
 
1
+ # Backend.py
2
+ from flask import Flask, request, jsonify
3
+ from flask_cors import CORS
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ app = Flask(__name__)
8
+ CORS(app)
9
+
10
+ # List of models
11
+ MODEL_PATHS = {
12
+ "gpt_oss_120b": "openai/gpt-oss-120b",
13
+ "deepseek_v3": "deepseek-ai/DeepSeek-V3.1-Base",
14
+ "gemini_25_pro": "afu4642tD/gemini-2.5-pro",
15
+ "veo3": "sudip1987/Generate_videos_with_Veo3",
16
+ "open_sora": "hpcai-tech/Open-Sora-v2",
17
+ "usp_image": "GD-ML/USP-Image_Generation",
18
+ "text_to_music": "sander-wood/text-to-music",
19
+ "qwen_image": "Qwen/Qwen-Image",
20
+ "qwen_image_diff": "Comfy-Org/Qwen-Image-DiffSynth-ControlNets",
21
+ "coqui_tts": "sk0032/coqui-tts-model",
22
+ "edge_tts": "sysf/Edge-TTS",
23
+ "whisper_large": "openai/whisper-large-v3-turbo",
24
+ "blip2_opt": "Salesforce/blip2-opt-2.7b",
25
+ "mini_gpt4": "Vision-CAIR/MiniGPT-4",
26
+ "glm_45": "zai-org/GLM-4.5",
27
+ "chatglm3": "zai-org/chatglm3-6b",
28
+ "gpt_oss_20b": "openai/gpt-oss-20b",
29
+ "m2m100": "facebook/m2m100_1.2B",
30
+ "tiny_marian": "onnx-community/tiny-random-MarianMTModel",
31
+ "memory_transformer": "Grpp/memory-transformer-ru",
32
+ "rl_memory_agent": "BytedTsinghua-SIA/RL-MemoryAgent-14B",
33
+ "m3_agent": "ByteDance-Seed/M3-Agent-Memorization",
34
+ "text_to_video": "ali-vilab/text-to-video-ms-1.7b"
35
+ }
36
+
37
+ def generate_single_answer(prompt, model_key):
38
+ """Load model, generate answer, free memory"""
39
+ model_name = MODEL_PATHS[model_key]
40
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ model_name,
43
+ device_map="auto" if torch.cuda.is_available() else None,
44
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
45
+ )
46
+
47
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
48
+ with torch.no_grad():
49
+ output = model.generate(**inputs, max_new_tokens=200)
50
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
51
+
52
+ # Clean up to save memory
53
+ del model, tokenizer, inputs, output
54
+ torch.cuda.empty_cache()
55
+
56
+ return answer
57
+
58
+ @app.route("/ask", methods=["POST"])
59
+ def ask():
60
+ data = request.json
61
+ prompt = data.get("prompt", "")
62
+ selected_models = data.get("models", ["gpt_oss_120b", "deepseek_v3", "gemini_25_pro"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # Merge answers from all selected models
65
+ answers = []
66
+ for model_key in selected_models:
67
+ if model_key in MODEL_PATHS:
68
+ try:
69
+ ans = generate_single_answer(prompt, model_key)
70
+ answers.append(ans)
71
+ except Exception as e:
72
+ answers.append(f"[Error loading {model_key}]")
73
+
74
+ # Return one merged answer
75
+ final_answer = " | ".join(answers)
76
+ return jsonify({"answer": final_answer})
77
+
78
+ if __name__ == "__main__":
79
+ app.run(host="0.0.0.0", port=5000, debug=True)