openfree commited on
Commit
aa7324d
·
verified ·
1 Parent(s): a217571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  DNA-Diffusion Gradio Application
3
- Interactive DNA sequence generation with slot machine visualization
4
  """
5
 
6
  import gradio as gr
@@ -9,6 +9,8 @@ import json
9
  import os
10
  from typing import Dict, Any, Tuple
11
  import html
 
 
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@@ -45,6 +47,115 @@ if not os.path.exists(HTML_FILE):
45
  with open(HTML_FILE, "r") as f:
46
  SLOT_MACHINE_HTML = f.read()
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  class DNADiffusionApp:
49
  """Main application class for DNA-Diffusion Gradio interface"""
50
 
@@ -52,6 +163,7 @@ class DNADiffusionApp:
52
  self.model = None
53
  self.model_loading = False
54
  self.model_error = None
 
55
 
56
  def initialize_model(self):
57
  """Initialize the DNA-Diffusion model"""
@@ -91,7 +203,6 @@ class DNADiffusionApp:
91
  'mock': True
92
  }
93
  # Simulate generation time
94
- import time
95
  time.sleep(2.0)
96
  return sequence, metadata
97
 
@@ -107,7 +218,28 @@ class DNADiffusionApp:
107
  """Handle sequence generation request from Gradio"""
108
  try:
109
  logger.info(f"Generating sequence for cell type: {cell_type}")
 
 
110
  sequence, metadata = self.generate_sequence(cell_type, guidance_scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  return sequence, json.dumps(metadata)
112
 
113
  except Exception as e:
 
1
  """
2
  DNA-Diffusion Gradio Application
3
+ Interactive DNA sequence generation with slot machine visualization and protein analysis
4
  """
5
 
6
  import gradio as gr
 
9
  import os
10
  from typing import Dict, Any, Tuple
11
  import html
12
+ import requests
13
+ import time
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
47
  with open(HTML_FILE, "r") as f:
48
  SLOT_MACHINE_HTML = f.read()
49
 
50
+ class ProteinAnalyzer:
51
+ """Handles protein translation and analysis using LLM"""
52
+
53
+ # Genetic code table for DNA to amino acid translation
54
+ CODON_TABLE = {
55
+ 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
56
+ 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
57
+ 'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
58
+ 'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
59
+ 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
60
+ 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
61
+ 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
62
+ 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
63
+ 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
64
+ 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
65
+ 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
66
+ 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
67
+ 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
68
+ 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
69
+ 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
70
+ 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
71
+ }
72
+
73
+ @staticmethod
74
+ def dna_to_protein(dna_sequence: str) -> str:
75
+ """Translate DNA sequence to protein sequence"""
76
+ # Ensure sequence is uppercase
77
+ dna_sequence = dna_sequence.upper()
78
+
79
+ # Remove any non-DNA characters
80
+ dna_sequence = ''.join(c for c in dna_sequence if c in 'ATCG')
81
+
82
+ # Translate to protein
83
+ protein = []
84
+ for i in range(0, len(dna_sequence) - 2, 3):
85
+ codon = dna_sequence[i:i+3]
86
+ if len(codon) == 3:
87
+ amino_acid = ProteinAnalyzer.CODON_TABLE.get(codon, 'X')
88
+ if amino_acid == '*': # Stop codon
89
+ break
90
+ protein.append(amino_acid)
91
+
92
+ return ''.join(protein)
93
+
94
+ @staticmethod
95
+ def analyze_protein_with_llm(protein_sequence: str, cell_type: str) -> str:
96
+ """Analyze protein structure and function using Friendli LLM API"""
97
+
98
+ # Get API token from environment
99
+ token = os.getenv("FRIENDLI_TOKEN")
100
+ if not token:
101
+ logger.warning("FRIENDLI_TOKEN not found in environment variables")
102
+ return "Protein analysis unavailable: API token not configured"
103
+
104
+ try:
105
+ url = "https://api.friendli.ai/dedicated/v1/chat/completions"
106
+ headers = {
107
+ "Authorization": f"Bearer {token}",
108
+ "Content-Type": "application/json"
109
+ }
110
+
111
+ # Create prompt for protein analysis
112
+ prompt = f"""You are a bioinformatics expert. Analyze the following protein sequence and provide insights about its potential structure and function.
113
+
114
+ Protein sequence: {protein_sequence}
115
+ Cell type context: {cell_type}
116
+
117
+ Please provide:
118
+ 1. Predicted protein family or domain based on sequence patterns
119
+ 2. Potential structural features (alpha helices, beta sheets, loops)
120
+ 3. Possible biological functions
121
+ 4. Relevance to the {cell_type} cell type
122
+ 5. Any notable sequence motifs or characteristics
123
+
124
+ Keep the response concise but informative, suitable for display in a scientific application."""
125
+
126
+ payload = {
127
+ "model": "dep89a2fld32mcm",
128
+ "messages": [
129
+ {
130
+ "role": "system",
131
+ "content": "You are a knowledgeable bioinformatics assistant specializing in protein structure and function prediction."
132
+ },
133
+ {
134
+ "role": "user",
135
+ "content": prompt
136
+ }
137
+ ],
138
+ "max_tokens": 1000,
139
+ "temperature": 0.7,
140
+ "top_p": 0.8,
141
+ "stream": False # Disable streaming for simplicity
142
+ }
143
+
144
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
145
+ response.raise_for_status()
146
+
147
+ result = response.json()
148
+ analysis = result['choices'][0]['message']['content']
149
+
150
+ return analysis
151
+
152
+ except requests.exceptions.RequestException as e:
153
+ logger.error(f"Failed to analyze protein with LLM: {e}")
154
+ return f"Protein analysis failed: {str(e)}"
155
+ except Exception as e:
156
+ logger.error(f"Unexpected error during protein analysis: {e}")
157
+ return "Protein analysis unavailable due to an error"
158
+
159
  class DNADiffusionApp:
160
  """Main application class for DNA-Diffusion Gradio interface"""
161
 
 
163
  self.model = None
164
  self.model_loading = False
165
  self.model_error = None
166
+ self.protein_analyzer = ProteinAnalyzer()
167
 
168
  def initialize_model(self):
169
  """Initialize the DNA-Diffusion model"""
 
203
  'mock': True
204
  }
205
  # Simulate generation time
 
206
  time.sleep(2.0)
207
  return sequence, metadata
208
 
 
218
  """Handle sequence generation request from Gradio"""
219
  try:
220
  logger.info(f"Generating sequence for cell type: {cell_type}")
221
+
222
+ # Generate DNA sequence
223
  sequence, metadata = self.generate_sequence(cell_type, guidance_scale)
224
+
225
+ # Translate to protein
226
+ logger.info("Translating DNA to protein sequence...")
227
+ protein_sequence = self.protein_analyzer.dna_to_protein(sequence)
228
+
229
+ # Add protein sequence to metadata
230
+ metadata['protein_sequence'] = protein_sequence
231
+ metadata['protein_length'] = len(protein_sequence)
232
+
233
+ # Analyze protein with LLM
234
+ logger.info("Analyzing protein structure and function...")
235
+ protein_analysis = self.protein_analyzer.analyze_protein_with_llm(
236
+ protein_sequence, cell_type
237
+ )
238
+
239
+ # Add analysis to metadata
240
+ metadata['protein_analysis'] = protein_analysis
241
+
242
+ logger.info("Generation and analysis complete")
243
  return sequence, json.dumps(metadata)
244
 
245
  except Exception as e: