Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Create motif_tagging.py
Browse files- motif_tagging.py +37 -0
 
    	
        motif_tagging.py
    ADDED
    
    | 
         @@ -0,0 +1,37 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # motif_tagging.py
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            import re
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            MOTIF_PATTERNS = {
         
     | 
| 6 | 
         
            +
                "physical_threat": [
         
     | 
| 7 | 
         
            +
                    r"\b(i'?ll|i am going to) (hurt|kill|break|end|ruin|destroy) you\b",
         
     | 
| 8 | 
         
            +
                    r"\bsay goodbye to (your|those)? (kneecaps|teeth|face)\b",
         
     | 
| 9 | 
         
            +
                    r"\bi'?ll put you in a (grave|hole|rose garden)\b",
         
     | 
| 10 | 
         
            +
                    r"\b(sleep with one eye open|you’ll see what happens)\b",
         
     | 
| 11 | 
         
            +
                    r"\bi'?ll make you disappear\b",
         
     | 
| 12 | 
         
            +
                ],
         
     | 
| 13 | 
         
            +
                "extreme_control": [
         
     | 
| 14 | 
         
            +
                    r"\bi decide who you (see|talk to|text|spend time with)\b",
         
     | 
| 15 | 
         
            +
                    r"\byou’re not allowed to\b",
         
     | 
| 16 | 
         
            +
                    r"\byou don’t get to (leave|say no|argue)\b",
         
     | 
| 17 | 
         
            +
                    r"\bi own you\b",
         
     | 
| 18 | 
         
            +
                ],
         
     | 
| 19 | 
         
            +
                "suicidal_threat": [
         
     | 
| 20 | 
         
            +
                    r"\bi'?ll kill myself\b",
         
     | 
| 21 | 
         
            +
                    r"\bi don’t want to live if you leave\b",
         
     | 
| 22 | 
         
            +
                    r"\bi’ll die without you\b",
         
     | 
| 23 | 
         
            +
                    r"\byou’ll regret it when i’m gone\b",
         
     | 
| 24 | 
         
            +
                ],
         
     | 
| 25 | 
         
            +
            }
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            def tag_motifs(text):
         
     | 
| 28 | 
         
            +
                tags = set()
         
     | 
| 29 | 
         
            +
                matches = []
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
                for label, patterns in MOTIF_PATTERNS.items():
         
     | 
| 32 | 
         
            +
                    for pattern in patterns:
         
     | 
| 33 | 
         
            +
                        if re.search(pattern, text, flags=re.IGNORECASE):
         
     | 
| 34 | 
         
            +
                            tags.add(label)
         
     | 
| 35 | 
         
            +
                            matches.append((label, pattern))
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                return list(tags), matches
         
     |