File size: 3,556 Bytes
cbfae43
7dfd5db
9c9d121
32cf640
7dfd5db
2144fdb
3b930a2
 
 
 
771a832
2144fdb
f37ed6f
e62c0db
83b6b2f
e62c0db
 
 
4f80c95
3b930a2
93bd79b
 
 
 
 
 
 
 
 
 
 
 
 
83b6b2f
9c9d121
fc9ddd0
84443f7
87253bd
 
 
 
cac8a90
f3ba429
7a65a6f
253707d
7dfd5db
83b6b2f
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import frontmatter
import gradio as gr
import spaces
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_id = "AverageBusinessUser/aidapal"
filename = "aidapal-8k.Q4_K_M.gguf"

print("Downloading model")

tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(
    model_id, gguf_file=filename, device_map="auto"
)

# Then create the pipeline with the model and tokenizer
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# TEMPLATE """{{ .System }}
# [INST]
# {{ .Prompt }}
# [/INST]
# """
# SYSTEM """<s>[INST]You are an expert at analyzing code that has been decompiled with IDA Hex Rays into IDA Hex Rays pseudocode. As a IDA Hex Rays pseudocode analyzer, you will be provided code that may or may not have symbols and variable names. You will analyze the IDA Hex Rays pseudocode and explain exactly what each line is doing. Then you will review your analysis and determine potential name for the function and variables within the function. Your task is use your knowledge of reverse engineering, IDA Hex Rays pseudocode, and C to assist the user with analysis and reverse engineering. Provide a detailed description of the Hex Rays pseudocode to the user explaining what the code does, suggest a function name based on the analysis of the pseudocode, and new variable names based on the analysis of the code. Only respond with valid JSON using the keys 'function_name','comment', and an array 'variables'. Values should use plain ascii with no special characters.
# Analyze the following IDA Hex Rays pseudocode and generate a valid JSON object containing the keys 'function_name','comment', and an array 'variables' explaining what the code does, suggest a function name based on the analysis of the code, and new variable names based on the analysis of the code.[/INST]</s>
# """

system = """<s>[INST]You are an expert at analyzing code that has been decompiled with IDA Hex Rays into IDA Hex Rays pseudocode. As a IDA Hex Rays pseudocode analyzer, you will be provided code that may or may not have symbols and variable names. You will analyze the IDA Hex Rays pseudocode and explain exactly what each line is doing. Then you will review your analysis and determine potential name for the function and variables within the function. Your task is use your knowledge of reverse engineering, IDA Hex Rays pseudocode, and C to assist the user with analysis and reverse engineering. Provide a detailed description of the Hex Rays pseudocode to the user explaining what the code does, suggest a function name based on the analysis of the pseudocode, and new variable names based on the analysis of the code. Only respond with valid JSON using the keys 'function_name','comment', and an array 'variables'. Values should use plain ascii with no special characters.
Analyze the following IDA Hex Rays pseudocode and generate a valid JSON object containing the keys 'function_name','comment', and an array 'variables' explaining what the code does, suggest a function name based on the analysis of the code, and new variable names based on the analysis of the code.[/INST]</s>
"""


@spaces.GPU
def predict(code):
    prompt = f"""{system}
[INST]
{code}
[/INST]
"""

    print(f"Prompt: {str(prompt)}")
    print(f"Tokenized: {tokenizer.tokenize(prompt)}")
    return pipe(prompt)


demo = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    description=frontmatter.load("README.md").content,
)
demo.launch()