Spaces:

BenjaminB
/

gistillery

Runtime error

App Files Files Community

Benjamin Bossan commited on May 8, 2023

Commit

1643735

1 Parent(s): 3efe4b4

Blacken

Browse files

Files changed (2) hide show

src/gistillery/ml.py +19 -6
src/gistillery/worker.py +3 -1

src/gistillery/ml.py CHANGED Viewed

@@ -32,7 +32,9 @@ class Processor(abc.ABC):
 class Summarizer(abc.ABC):
-    def __init__(self, model_name: str, model: Any, tokenizer: Any, generation_config: Any) -> None:
         raise NotImplementedError
     def get_name(self) -> str:
@@ -44,7 +46,9 @@ class Summarizer(abc.ABC):
 class Tagger(abc.ABC):
-    def __init__(self, model_name: str, model: Any, tokenizer: Any, generation_config: Any) -> None:
         raise NotImplementedError
     def get_name(self) -> str:
@@ -90,7 +94,9 @@ class MlRegistry:
 class HfTransformersSummarizer(Summarizer):
-    def __init__(self, model_name: str, model: Any, tokenizer: Any, generation_config: Any) -> None:
         self.model_name = model_name
         self.model = model
         self.tokenizer = tokenizer
@@ -101,7 +107,9 @@ class HfTransformersSummarizer(Summarizer):
     def __call__(self, x: str) -> str:
         text = self.template.format(x)
         inputs = self.tokenizer(text, return_tensors="pt")
-        outputs = self.model.generate(**inputs, generation_config=self.generation_config)
         output = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
         assert isinstance(output, str)
         return output
@@ -111,7 +119,9 @@ class HfTransformersSummarizer(Summarizer):
 class HfTransformersTagger(Tagger):
-    def __init__(self, model_name: str, model: Any, tokenizer: Any, generation_config: Any) -> None:
         self.model_name = model_name
         self.model = model
         self.tokenizer = tokenizer
@@ -132,7 +142,9 @@ class HfTransformersTagger(Tagger):
     def __call__(self, x: str) -> list[str]:
         text = self.template.format(x)
         inputs = self.tokenizer(text, return_tensors="pt")
-        outputs = self.model.generate(**inputs, generation_config=self.generation_config)
         output = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
         tags = self._extract_tags(output)
         return tags
@@ -171,6 +183,7 @@ class DefaultUrlProcessor(Processor):
         text = self.template.format(url=self.url, content=text)
         return text
 # class ProcessorRegistry:
 #     def __init__(self) -> None:
 #         self.registry: list[Processor] = []

 class Summarizer(abc.ABC):
+    def __init__(
+        self, model_name: str, model: Any, tokenizer: Any, generation_config: Any
+    ) -> None:
         raise NotImplementedError
     def get_name(self) -> str:
 class Tagger(abc.ABC):
+    def __init__(
+        self, model_name: str, model: Any, tokenizer: Any, generation_config: Any
+    ) -> None:
         raise NotImplementedError
     def get_name(self) -> str:
 class HfTransformersSummarizer(Summarizer):
+    def __init__(
+        self, model_name: str, model: Any, tokenizer: Any, generation_config: Any
+    ) -> None:
         self.model_name = model_name
         self.model = model
         self.tokenizer = tokenizer
     def __call__(self, x: str) -> str:
         text = self.template.format(x)
         inputs = self.tokenizer(text, return_tensors="pt")
+        outputs = self.model.generate(
+            **inputs, generation_config=self.generation_config
+        )
         output = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
         assert isinstance(output, str)
         return output
 class HfTransformersTagger(Tagger):
+    def __init__(
+        self, model_name: str, model: Any, tokenizer: Any, generation_config: Any
+    ) -> None:
         self.model_name = model_name
         self.model = model
         self.tokenizer = tokenizer
     def __call__(self, x: str) -> list[str]:
         text = self.template.format(x)
         inputs = self.tokenizer(text, return_tensors="pt")
+        outputs = self.model.generate(
+            **inputs, generation_config=self.generation_config
+        )
         output = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
         tags = self._extract_tags(output)
         return tags
         text = self.template.format(url=self.url, content=text)
         return text
 # class ProcessorRegistry:
 #     def __init__(self) -> None:
 #         self.registry: list[Processor] = []

src/gistillery/worker.py CHANGED Viewed

@@ -122,7 +122,9 @@ def load_mlregistry(model_name: str) -> MlRegistry:
     # increase the temperature to make the model more creative
     config_tagger.temperature = 1.5
-    summarizer = HfTransformersSummarizer(model_name, model, tokenizer, config_summarizer)
     tagger = HfTransformersTagger(model_name, model, tokenizer, config_tagger)
     registry = MlRegistry()

     # increase the temperature to make the model more creative
     config_tagger.temperature = 1.5
+    summarizer = HfTransformersSummarizer(
+        model_name, model, tokenizer, config_summarizer
+    )
     tagger = HfTransformersTagger(model_name, model, tokenizer, config_tagger)
     registry = MlRegistry()