kardosdrur commited on
Commit
08620e1
·
1 Parent(s): 33f044a

Added corpus

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. corpus.txt +0 -0
  3. main.py +2 -8
Dockerfile CHANGED
@@ -32,6 +32,6 @@ RUN git clone https://github.com/x-tabdeveloping/topicwizard
32
  WORKDIR $HOME/app/topicwizard
33
  RUN git checkout topic-arena
34
  RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
35
- RUN mkdir data
36
  EXPOSE 7860
37
  CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
 
32
  WORKDIR $HOME/app/topicwizard
33
  RUN git checkout topic-arena
34
  RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
35
+ RUN cp $HOME/app/corpus.txt $HOME/app/topicwizard/corpus.txt
36
  EXPOSE 7860
37
  CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
corpus.txt ADDED
The diff for this file is too large to render. See raw diff
 
main.py CHANGED
@@ -29,14 +29,8 @@ def create_app(blueprint):
29
  return app
30
 
31
 
32
- print("Fetching data")
33
- newsgroups = fetch_20newsgroups(
34
- data_home="data",
35
- subset="all",
36
- remove=("headers", "footers", "quotes"),
37
- categories=["alt.atheism", "sci.space"],
38
- )
39
- corpus = newsgroups.data
40
 
41
  print("Calculating embeddings")
42
  encoder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
 
29
  return app
30
 
31
 
32
+ with open("corpus.txt") as in_file:
33
+ corpus = in_file.read().split("\n")
 
 
 
 
 
 
34
 
35
  print("Calculating embeddings")
36
  encoder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")