Spaces:

lintasmediadanawa
/

web_scrape

Sleeping

App Files Files Community

jonathanjordan21 commited on Sep 6, 2024

Commit

ab28310

verified ·

1 Parent(s): 4f2d693

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import html2text
 import requests
 import httpx
 import re
 from fastapi.middleware.cors import CORSMiddleware
@@ -58,6 +59,80 @@ async def linkedin_post_details(post_id: str):
         "is_edited": edited,
         "insights": {"likeCount": likes, "commentCount": comments, "shareCount": None, "viewCount":None},
     }
 @app.get("/google_search")
@@ -86,6 +161,8 @@ async def google_search(q: str, delimiter: str = "\n---\n", sites: Annotated[lis
 @app.get("/tiktok_video_details")
 async def tiktok_video_details(username: str, video_id:str):
     url = f"https://www.tiktok.com/{username}/video/{video_id}"
     # user_agent = "LinkedInBot"
     user_agent = "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"

 import requests
 import httpx
 import re
+import json
 from fastapi.middleware.cors import CORSMiddleware
         "is_edited": edited,
         "insights": {"likeCount": likes, "commentCount": comments, "shareCount": None, "viewCount":None},
     }
+@app.get("/facebook_post_detail")
+async def fb_post_detail(username: str, post_id: str):
+    url = f"https://www.facebook.com/{username}/posts/{post_id}"
+    user_agent = "Googlebot"
+    res = requests.get(
+        url,
+        headers={
+            "user-agent": user_agent,
+            "accept-language": "en-US"
+        },
+        timeout=(10, 27),
+    )
+    soup = BeautifulSoup(res.content, "html.parser")
+    script_tags = soup.find_all("script")
+    print(len(script_tags))
+    for script_tag in script_tags:
+        try:
+            if "important_reactors" in script_tag.string:
+                splitter = '"reaction_count":{"count":'
+                total_react, reaction_split = script_tag.string.split(splitter, 2)[1].split("},", 1)
+                total_react = total_react.split(',"')[0]
+                pattern = r"\[.*?\]"
+                reactions = re.search(pattern, reaction_split)
+                if reactions:
+                    reactions = json.loads(reactions.group(0))
+                else:
+                    reactions = []
+                reactions = [
+                    dict(
+                        name=reaction["node"]["localized_name"].lower(),
+                        count=reaction["reaction_count"],
+                        is_visible=reaction["visible_in_bling_bar"],
+                    )
+                    for reaction in reactions
+                ]
+                splitter = '"share_count":{"count":'
+                shares = script_tag.string.split(splitter, 2)[1].split(",")[0]
+                splitter = '"comments":{"total_count":'
+                comments = script_tag.string.split(splitter, 2)[1].split("}")[0]
+                likes = [x.get("count") for x in reactions if x.get("name") == "like"][0]
+                print(total_react, reactions, shares, comments, likes)
+            if '"message":{"text":"' in script_tag.string:
+                desc = script_tag.string.split('"message":{"text":"', 1)[-1].split('"},')[0]
+        except Exception as e:
+            print(e)
+            continue
+    name = soup.find("meta", {"property": "og:title"}).get("content")
+    return {
+        "insights": {
+            "likeCount": likes,
+            "commentCount": comments,
+            "shareCount": shares,
+            "reactionCount": total_react,
+            "reactions": reactions,
+        },
+        "description": desc,
+        "username": username,
+        "name": name,
+        "date": None,
+    }
 @app.get("/google_search")
 @app.get("/tiktok_video_details")
 async def tiktok_video_details(username: str, video_id:str):
+    if username[0] != @:
+        username = "@" + username
     url = f"https://www.tiktok.com/{username}/video/{video_id}"
     # user_agent = "LinkedInBot"
     user_agent = "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"