File size: 3,468 Bytes
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe853b6
 
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
631e35c
cb8f97e
 
 
e15f1b6
 
 
cb8f97e
e15f1b6
 
cb8f97e
 
 
 
 
e15f1b6
cb8f97e
 
 
 
 
 
e15f1b6
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from huggingface_hub import hf_hub_download
import json
import gzip


usernames = {}


filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v1.1")
with gzip.open(filepath, 'r') as f:
    usernames["v1.1"] = json.loads(f.read().decode('utf-8'))

filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
with gzip.open(filepath, 'r') as f:
    usernames["v1.0"] = json.loads(f.read().decode('utf-8'))

text = """\
🔍🌟AICodeFly⚡️ is a program to make retrieving github repositories fast and easy to review and to download.  Using AI to make your coding experience fast and easy.
This search engine will match your search term and find up to 100 github repositories that match.  If you click Use the link to shell the repository as html will be added to the page.
""" + """\
"""

def check_username(username, version):
    output_md = ""
    if username in usernames[version] and len(usernames[version][username])>0:
        repos = usernames[version][username]
        repo_word = "repository" if len(repos)==1 else "repositories"
        output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
        for repo in repos:
            output_md += f"_{repo}_\n\n"
    else:
        output_md += "**No**, your code is not in The Stack."
    return output_md.strip()

def check_keyword(username, version):
    output_md = ""
    maxhitcount = 1000
    maxrepos = 70000000  #6M user entries * up to 18 per user
    currenthitcount=0
    currentrepos=0
    repocounter=0
    usercounter=0
    
    for repolist in usernames[version]:
        usercounter += 1
        
        #print(repolist)
        repos = usernames[version][repolist]
        repo_word = "repository" if len(repos)==1 else "repositories"
        #output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
        for repo in repos:
            repocounter += 1
            currentrepos += 1
            if currentrepos > maxrepos: 
                output_md += f"**Found maximum repos**, Count: **{currentrepos}** in The Stack:\n\n"
                return output_md.strip()
            if username in repo:
                currenthitcount += 1
                output_md += f"_<a href=https://github.com/{repo} target=_blank>{repo} repocounter: {repocounter} usercounter: {usercounter}</a>_\n\n"
                if currenthitcount > maxhitcount: 
                    output_md += f"**Found maximum hits**, Count: **{currenthitcount}** in The Stack:\n\n"
                    return output_md.strip()
    else:
        output_md += "**Searched All Repos**, Above found in The Stack."
    return output_md.strip()

with gr.Blocks() as demo:
    with gr.Row():
        _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
        with colum_2:
            gr.Markdown(text)
            version = gr.Dropdown(["v1.1", "v1.0"], label="The Stack version:", value="v1.1")
            username = gr.Text("", label="Keyword to match against repos e.g. BeatSaber")
            check_button = gr.Button("Check!")
            
            repos = gr.Markdown()
            
            #check_button.click(check_username, [username, version], repos)
            check_button.click(check_keyword, [username, version], repos)


demo.launch()