Spaces:

abdullah63
/

CPP-to-Pseudocode

Sleeping

App Files Files Community

abdullah63 commited on Mar 14

Commit

332b30f

verified ·

1 Parent(s): 2572242

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -24

app.py CHANGED Viewed

@@ -134,39 +134,62 @@ class Transformer(nn.Module):
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load tokenizers
-sp_pseudo = spm.SentencePieceProcessor(model_file="pseudo.model")  # For decoding pseudocode (target)
-sp_code = spm.SentencePieceProcessor(model_file="code.model")      # For encoding C++ (source)
-# Load the full saved model (architecture + weights)
-model_path = "transformer_cpp_to_pseudo_30.pth"  # Adjust path to your C++ to pseudocode model
-model = torch.load(model_path, map_location=device, weights_only=False)
-model.eval()
-model = model.to(device)
 def generate_pseudocode(cpp_code, max_len):
     """Generate pseudocode from C++ code with streaming output."""
     model.eval()
-    src = torch.tensor([sp_code.encode_as_ids(cpp_code)], dtype=torch.long, device=device)  # Tokenize C++ code
-    tgt = torch.tensor([[2]], dtype=torch.long, device=device)  # <bos_id>=2
-    generated_tokens = [2]  # Start with <START>
-    response = ""
-    with torch.no_grad():
-        for _ in range(max_len):
-            output = model(src, tgt)
-            next_token = output[:, -1, :].argmax(-1).item()
-            generated_tokens.append(next_token)
-            tgt = torch.cat([tgt, torch.tensor([[next_token]], device=device)], dim=1)
-            response = sp_pseudo.decode_ids(generated_tokens)  # Decode to pseudocode
-            yield response  # Yield partial output
-            if next_token == 3:  # <END>=3 (adjust if your EOS ID differs)
-                break
-    yield response  # Final output
 def respond(message, history, max_tokens):
     """Wrapper for Gradio interface."""
     for response in generate_pseudocode(message, max_tokens):
         yield response
@@ -183,4 +206,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 # Load tokenizers
+try:
+    sp_pseudo = spm.SentencePieceProcessor(model_file="pseudo.model")
+    sp_code = spm.SentencePieceProcessor(model_file="code.model")
+    print("Tokenizers loaded successfully.")
+except Exception as e:
+    print(f"Error loading tokenizers: {e}")
+    raise
+# Load the full saved model
+model_path = "transformer_cpp_to_pseudo_30.pth"
+try:
+    model = torch.load(model_path, map_location=device, weights_only=False)
+    model.eval()
+    model = model.to(device)
+    print("Model loaded successfully.")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    raise
 def generate_pseudocode(cpp_code, max_len):
     """Generate pseudocode from C++ code with streaming output."""
+    print(f"Input C++ code: {cpp_code}")
     model.eval()
+    try:
+        src_tokens = sp_code.encode_as_ids(cpp_code)
+        print(f"Source tokens: {src_tokens}")
+        src = torch.tensor([src_tokens], dtype=torch.long, device=device)
+        tgt = torch.tensor([[2]], dtype=torch.long, device=device)  # <bos_id>=2
+        generated_tokens = [2]  # Start with <START>
+        response = ""
+        with torch.no_grad():
+            for i in range(max_len):
+                output = model(src, tgt)
+                next_token = output[:, -1, :].argmax(-1).item()
+                generated_tokens.append(next_token)
+                tgt = torch.cat([tgt, torch.tensor([[next_token]], device=device)], dim=1)
+                response = sp_pseudo.decode_ids(generated_tokens)
+                print(f"Step {i}: Next token = {next_token}, Generated so far: {response}")
+                yield response  # Yield partial output
+                if next_token == 3:  # <END>=3
+                    print("EOS token detected, stopping generation.")
+                    break
+        yield response  # Final output
+    except Exception as e:
+        print(f"Error in generation: {e}")
+        yield f"Error: {e}"
 def respond(message, history, max_tokens):
     """Wrapper for Gradio interface."""
+    print(f"Received message: {message}")
     for response in generate_pseudocode(message, max_tokens):
         yield response
 )
 if __name__ == "__main__":
+    demo.launch(debug=True)  # Enable debug mode for more output