Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						dad3685
	
1
								Parent(s):
							
							72241b4
								
second model ensemble
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -3,12 +3,18 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification 
     | 
|
| 3 | 
         
             
            import torch
         
     | 
| 4 | 
         | 
| 5 | 
         
             
            model_path = "modernbert.bin"
         
     | 
| 
         | 
|
| 6 | 
         
             
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         
     | 
| 7 | 
         | 
| 8 | 
         
             
            tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
         
     | 
| 9 | 
         
            -
             
     | 
| 10 | 
         
            -
             
     | 
| 11 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 12 | 
         | 
| 13 | 
         
             
            label_mapping = {
         
     | 
| 14 | 
         
             
                0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
         
     | 
| 
         @@ -30,7 +36,11 @@ def classify_text(text): 
     | 
|
| 30 | 
         
             
                inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
         
     | 
| 31 | 
         | 
| 32 | 
         
             
                with torch.no_grad():
         
     | 
| 33 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 34 | 
         | 
| 35 | 
         
             
                ai_probs = probabilities.clone()
         
     | 
| 36 | 
         
             
                ai_probs[24] = 0
         
     | 
| 
         @@ -53,7 +63,6 @@ def classify_text(text): 
     | 
|
| 53 | 
         
             
                return result_message
         
     | 
| 54 | 
         | 
| 55 | 
         | 
| 56 | 
         
            -
             
     | 
| 57 | 
         
             
            title = "AI Text Detector"
         
     | 
| 58 | 
         | 
| 59 | 
         
             
            description = """
         
     | 
| 
         | 
|
| 3 | 
         
             
            import torch
         
     | 
| 4 | 
         | 
| 5 | 
         
             
            model_path = "modernbert.bin"
         
     | 
| 6 | 
         
            +
            huggingface_model_url = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
         
     | 
| 7 | 
         
             
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         
     | 
| 8 | 
         | 
| 9 | 
         
             
            tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            model_1 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
         
     | 
| 12 | 
         
            +
            model_1.load_state_dict(torch.load(model_path, map_location=device))
         
     | 
| 13 | 
         
            +
            model_1.to(device).eval()
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
         
     | 
| 16 | 
         
            +
            model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
         
     | 
| 17 | 
         
            +
            model_2.to(device).eval()
         
     | 
| 18 | 
         | 
| 19 | 
         
             
            label_mapping = {
         
     | 
| 20 | 
         
             
                0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
         
     | 
| 
         | 
|
| 36 | 
         
             
                inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
         
     | 
| 37 | 
         | 
| 38 | 
         
             
                with torch.no_grad():
         
     | 
| 39 | 
         
            +
                    logits_1 = model_1(**inputs).logits
         
     | 
| 40 | 
         
            +
                    logits_2 = model_2(**inputs).logits
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                    avg_logits = (logits_1 + logits_2) / 2
         
     | 
| 43 | 
         
            +
                    probabilities = torch.softmax(avg_logits, dim=1)[0]
         
     | 
| 44 | 
         | 
| 45 | 
         
             
                ai_probs = probabilities.clone()
         
     | 
| 46 | 
         
             
                ai_probs[24] = 0
         
     | 
| 
         | 
|
| 63 | 
         
             
                return result_message
         
     | 
| 64 | 
         | 
| 65 | 
         | 
| 
         | 
|
| 66 | 
         
             
            title = "AI Text Detector"
         
     | 
| 67 | 
         | 
| 68 | 
         
             
            description = """
         
     |