Pratham0409 commited on
Commit
2ef2e08
·
verified ·
1 Parent(s): 9aebba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -15
app.py CHANGED
@@ -1,28 +1,69 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Load the AI detection model pipeline from Hugging Face
5
- # We're using a well-regarded RoBERTa-based model for this task.
6
- pipe = pipeline("text-classification", model="openai-community/roberta-base-openai-detector")
 
 
 
7
 
8
  def detect_ai_text(text):
9
  """
10
- Analyzes the input text and returns the model's prediction.
11
- The model returns a list of dictionaries. We want the one that tells us the 'AI' score.
12
  """
13
- results = pipe(text)
14
- # The model outputs probabilities for both 'LABEL_0' (Human) and 'LABEL_1' (AI).
15
- # We'll return the full results for clarity.
16
- return {item['label']: item['score'] for item in results}
 
 
 
 
 
 
17
 
18
- # Create the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  iface = gr.Interface(
20
  fn=detect_ai_text,
21
- inputs=gr.Textbox(lines=10, placeholder="Paste the text you want to analyze here..."),
22
  outputs="json",
23
- title="AI Content Detector",
24
- description="A simple API to detect AI-generated text. Powered by roberta-base-openai-detector."
25
  )
26
 
27
- # Launch the app. The `share=True` argument is what makes the API accessible.
28
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer
3
 
4
+ # --- MODEL LOADING ---
5
+ # Load both the pipeline and the tokenizer for the model
6
+ # The tokenizer is needed to split the text into chunks the model can understand.
7
+ model_name = "openai-community/roberta-base-openai-detector"
8
+ pipe = pipeline("text-classification", model=model_name)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
  def detect_ai_text(text):
12
  """
13
+ Analyzes input text, handling long texts by chunking them into smaller pieces.
 
14
  """
15
+ # Get the model's max length, subtracting a few tokens for safety margin.
16
+ max_length = tokenizer.model_max_length - 2
17
+
18
+ # Tokenize the entire input text
19
+ tokens = tokenizer.encode(text)
20
+
21
+ # If the text is short enough, process it in one go.
22
+ if len(tokens) <= max_length:
23
+ results = pipe(text)
24
+ return {item['label']: item['score'] for item in results}
25
 
26
+ # --- CHUNKING LOGIC FOR LONG TEXT ---
27
+ # If the text is too long, we process it in overlapping chunks.
28
+ all_scores = []
29
+
30
+ # Create chunks with a 50-token overlap to maintain context between them
31
+ for i in range(0, len(tokens), max_length - 50):
32
+ chunk_tokens = tokens[i:i + max_length]
33
+ # Decode the chunk tokens back to a string for the pipeline
34
+ chunk_text = tokenizer.decode(chunk_tokens)
35
+
36
+ # Run the model on the chunk
37
+ chunk_results = pipe(chunk_text)
38
+
39
+ # Find the score for the 'AI_GENERATED' label (LABEL_1)
40
+ for item in chunk_results:
41
+ if item['label'] == 'LABEL_1': # LABEL_1 is the AI score
42
+ all_scores.append(item['score'])
43
+ break # Move to the next chunk
44
+
45
+ # If for some reason no scores were collected, return an error state.
46
+ if not all_scores:
47
+ return {"error": "Could not process text."}
48
+
49
+ # Average the AI scores from all chunks to get a final score
50
+ average_ai_score = sum(all_scores) / len(all_scores)
51
+
52
+ # Return the aggregated result in the same format as a single run
53
+ return {
54
+ 'LABEL_1': average_ai_score, # AI score
55
+ 'LABEL_0': 1 - average_ai_score, # Human score
56
+ 'note': f'Result aggregated from {len(all_scores)} chunks.'
57
+ }
58
+
59
+ # --- GRADIO INTERFACE ---
60
  iface = gr.Interface(
61
  fn=detect_ai_text,
62
+ inputs=gr.Textbox(lines=15, placeholder="Paste the text you want to analyze here..."),
63
  outputs="json",
64
+ title="AI Content Detector (Robust Version)",
65
+ description="This version handles long texts by breaking them into chunks. It analyzes text for AI generation using the roberta-base-openai-detector model."
66
  )
67
 
68
+ # Launch the app
69
  iface.launch()