File size: 2,200 Bytes
73f9174
95f7ff3
bcda593
50639ab
2e937f5
bcda593
2e937f5
 
57d46c6
 
 
 
95f7ff3
b11c8cd
bcda593
73f9174
 
 
 
 
 
 
 
 
 
 
bcda593
 
 
95f7ff3
bcda593
b11c8cd
 
bcda593
 
 
 
95f7ff3
 
 
 
 
bcda593
95f7ff3
 
 
 
bcda593
 
 
95f7ff3
bcda593
95f7ff3
bcda593
 
95f7ff3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import requests
from collections import Counter

api_token = os.environ.get("TOKEN")
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
headers = {"Authorization": f"Bearer {api_token}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def analyze_sentiment(pl7_texts):
    output = query({
        "inputs": f'''<s>[INST] <<SYS>>
You're going to deeply analyze the texts I'm going to give you and you're only going to tell me which category they belong to by answering only the words that correspond to the following categories:
For posts that talk about chat models/LLM, return "Chatmodel/LLM"
For posts that talk about image generation models, return "image_generation"
For texts that ask for information from the community, return "questions"
For posts about fine-tuning or model adjustment, return "fine_tuning"
For posts related to ethics and bias in AI, return "ethics_bias"
For posts about datasets and data preparation, return "datasets"
For posts about tools and libraries, return "tools_libraries"
For posts containing tutorials and guides, return "tutorials_guides"
For posts about debugging and problem-solving, return "debugging"
Respond only with the category name, without any additional explanation or text.
<</SYS>>

Analyze the following text:
{pl7_texts}
[/INST]
'''
    })
    
    if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]:
        return output[0]['generated_text'].strip().lower()
    return "unknown"

url = 'https://huggingface.co/posts'
response = requests.get(url)

if response.status_code == 200:
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    pl7_elements = soup.find_all(class_='pl-7')
    pl7_texts = [element.text.strip() for element in pl7_elements]

    sentiment_counter = Counter()

    for text in pl7_texts:
        sentiment = analyze_sentiment(text)
        sentiment_counter[sentiment] += 1

    for category, count in sentiment_counter.items():
        print(f"{category} = {count}")
else:
    print(f"Error {response.status_code} when retrieving {url}")