tommymarto commited on
Commit
57a9e1d
·
1 Parent(s): 0d887dc

first attempt to hf spaces

Browse files
Files changed (4) hide show
  1. config/config.yaml +3 -0
  2. requirements.txt +0 -2
  3. src/demo.py +8 -4
  4. src/gradio.py +17 -0
config/config.yaml CHANGED
@@ -20,6 +20,9 @@ debug:
20
  is_debug: false
21
  force_rebuild_storage: false
22
 
 
 
 
23
 
24
  hydra:
25
  verbose: false
 
20
  is_debug: false
21
  force_rebuild_storage: false
22
 
23
+ document_parsing:
24
+ enabled: false
25
+
26
 
27
  hydra:
28
  verbose: false
requirements.txt CHANGED
@@ -35,7 +35,6 @@ cryptography==41.0.2
35
  cupy==12.1.0
36
  cycler==0.12.0
37
  cymem==2.0.7
38
- dataclasses==0.8
39
  dataclasses-json==0.5.7
40
  datasets==2.14.2
41
  debugpy==1.6.7
@@ -43,7 +42,6 @@ decorator==5.1.1
43
  defusedxml==0.7.1
44
  dill==0.3.7
45
  effdet==0.4.1
46
- en-core-web-trf==3.6.1
47
  entrypoints==0.4
48
  et-xmlfile==1.1.0
49
  exceptiongroup==1.1.2
 
35
  cupy==12.1.0
36
  cycler==0.12.0
37
  cymem==2.0.7
 
38
  dataclasses-json==0.5.7
39
  datasets==2.14.2
40
  debugpy==1.6.7
 
42
  defusedxml==0.7.1
43
  dill==0.3.7
44
  effdet==0.4.1
 
45
  entrypoints==0.4
46
  et-xmlfile==1.1.0
47
  exceptiongroup==1.1.2
src/demo.py CHANGED
@@ -55,10 +55,11 @@ class App:
55
  def __init__(self, cfg : DictConfig) -> None:
56
  self.cfg = cfg
57
 
58
- log.info("Loading: Document Loader")
59
- self.loader = hydra.utils.instantiate(cfg.document_loader)
60
- log.info("Loading: Text Splitter")
61
- self.splitter = hydra.utils.instantiate(cfg.text_splitter)
 
62
  log.info("Loading: Text Embedding Model")
63
  self.text_embedding_model = hydra.utils.instantiate(cfg.text_embedding)
64
  log.info("Loading: Vector Store")
@@ -73,6 +74,9 @@ class App:
73
  # if vector store exists, load it
74
 
75
  if not Path(self.cfg.storage_path.vector_store).exists() or self.cfg.debug.force_rebuild_storage:
 
 
 
76
  message = (
77
  "Vector store not found at %s. Building storage from scratch"
78
  if not self.cfg.debug.force_rebuild_storage
 
55
  def __init__(self, cfg : DictConfig) -> None:
56
  self.cfg = cfg
57
 
58
+ if cfg.document_parsing.enabled:
59
+ log.info("Loading: Document Loader")
60
+ self.loader = hydra.utils.instantiate(cfg.document_loader)
61
+ log.info("Loading: Text Splitter")
62
+ self.splitter = hydra.utils.instantiate(cfg.text_splitter)
63
  log.info("Loading: Text Embedding Model")
64
  self.text_embedding_model = hydra.utils.instantiate(cfg.text_embedding)
65
  log.info("Loading: Vector Store")
 
74
  # if vector store exists, load it
75
 
76
  if not Path(self.cfg.storage_path.vector_store).exists() or self.cfg.debug.force_rebuild_storage:
77
+ if not self.cfg.document_parsing.enabled:
78
+ raise ValueError("Document parsing is not enabled. Cannot build storage from scratch")
79
+
80
  message = (
81
  "Vector store not found at %s. Building storage from scratch"
82
  if not self.cfg.debug.force_rebuild_storage
src/gradio.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from hydra import compose, initialize
3
+ from omegaconf import OmegaConf
4
+
5
+ from demo import App
6
+
7
+ def main():
8
+ with initialize(version_base=None, config_path="../config", job_name="gradio_app"):
9
+ cfg = compose(config_name="config", overrides=["document_parsing.enabled=False"])
10
+
11
+ app = App(cfg)
12
+
13
+ webapp = gr.ChatInterface(fn=app.ask_chat, examples=["hello", "hola", "merhaba"], title="LLM4SciLit")
14
+ webapp.launch(share=True)
15
+
16
+ if __name__ == "__main__":
17
+ main()