Spaces:

Anvilogic
/

T5-Typosquat-Detect

Sleeping

File size: 1,662 Bytes

64aa5df
 
 
 
 
 
 
 
 
 
 
 
 
 
480c8da
64aa5df
 
 
4e4710d
64aa5df
 
 
 
 
4e4710d
 
64aa5df
4e4710d
64aa5df
4e4710d
 
64aa5df
 
4e4710d
 
64aa5df
 
4e4710d
64aa5df
 
4e4710d
480c8da
64aa5df
 
4e4710d

import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel
import torch

@st.cache_resource
def load_model():
    model_id = "google/flan-t5-large"
    adapter_path = "./Flan-T5-Typosquat-detect"  # Adjust to your saved adapter path

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
    model = PeftModel.from_pretrained(model, adapter_path)
    model = model.merge_and_unload()
    model.eval()
    
    return model, tokenizer
device='cpu'
model, tokenizer = load_model()

st.title("FLAN-T5 Typosquatting Detection")
st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.")

prompt_prefix = "Is the first domain a typosquat of the second:"

potential_typosquat = st.text_input("Potential Typosquatted Domain", value="lonlonsoft.com")
target_domain = st.text_input("Legitimate Domain", value="stiltsoft.net")

full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}"

if st.button("Check Typosquatting"):
    if potential_typosquat and target_domain:
        # Encode and generate response
        input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
        outputs = model.generate(input_ids, max_new_tokens=20)
        
        # Decode the response
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Display the result
        st.write("**Prediction: **")
        st.write(prediction)
    else:
        st.warning("Please enter both domains to perform the check.")