Spaces:

idvxlab
/

EmotiCrafter-Demo

Running on Zero

App Files Files Community

idvxlab commited on 24 days ago

Commit

c6d7b6f

verified ·

1 Parent(s): 7ecd584

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +2 -0
app.py +151 -0
assets/emotion.png +0 -0
assets/output_image.png +3 -0
assets/output_image3.png +3 -0
model.py +267 -0
requirements.txt +241 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/output_image.png filter=lfs diff=lfs merge=lfs -text
+assets/output_image3.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import spaces
+import gradio as gr
+import torch
+from PIL import Image
+from transformers import AutoTokenizer
+from diffusers import StableDiffusionXLPipeline
+from huggingface_hub import hf_hub_download
+from model import EmotionInjectionTransformer
+from transformers import GPT2Config
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Initialize Emotion Injection Model
+config = GPT2Config.from_pretrained('gpt2')
+emotion_add_method = {"a": "cross", "v": "cross"}
+model = EmotionInjectionTransformer(config, final_out_type="Linear+LN").to(device)
+model = torch.nn.DataParallel(model)
+# Initialize Stable Diffusion XL Pipeline
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    use_safetensors=True
+)
+pipe.to(device)
+@spaces.GPU
+def generate_image(prompt, arousal, valence, model_scale, seed=24):
+    # Map scales to checkpoint filenames in the Hugging Face repo
+    model_checkpoints = {
+        1.0: 'scale_factor_1.0.pth',
+        1.25: 'scale_factor_1.25.pth',
+        1.5: 'scale_factor_1.5.pth',
+        1.75: 'scale_factor_1.75.pth',
+        2.0: 'scale_factor_2.0.pth'
+    }
+    # Download the corresponding checkpoint from the Hugging Face Hub
+    if model_scale in model_checkpoints:
+        filename = model_checkpoints[model_scale]
+        model_path = hf_hub_download(
+            repo_id="idvxlab/EmotiCrafter",
+            filename=filename
+        )
+        state_dict = torch.load(model_path, map_location=device)
+        model.load_state_dict(state_dict)
+    else:
+        raise ValueError(f"Model scale {model_scale} not found in hosted checkpoints.")
+    model.eval()
+    # Encode prompt into embeddings
+    (prompt_embeds_ori,
+     negative_prompt_embeds,
+     pooled_prompt_embeds_ori,
+     negative_pooled_prompt_embeds) = pipe.encode_prompt(
+        prompt=[prompt],
+        prompt_2=[prompt],
+        device=device,
+        num_images_per_prompt=1,
+        do_classifier_free_guidance=True,
+        negative_prompt=None,
+        negative_prompt_2=None
+    )
+    resolution = 1024
+    with torch.no_grad():
+        # Inject emotions into embeddings
+        out = model(
+            inputs_embeds=prompt_embeds_ori.to(torch.float32),
+            arousal=torch.FloatTensor([[arousal]]).to(device),
+            valence=torch.FloatTensor([[valence]]).to(device)
+        )
+        # Generate image with or without seed
+        gen_kwargs = dict(
+            prompt_embeds=out[0].to(torch.float16),
+            pooled_prompt_embeds=pooled_prompt_embeds_ori,
+            guidance_scale=7.5,
+            num_inference_steps=40,
+            height=resolution,
+            width=resolution
+        )
+        if seed is not None:
+            gen_kwargs['generator'] = torch.manual_seed(seed)
+        image = pipe(**gen_kwargs).images[0]
+        return image
+# Gradio UI
+css = """
+#small-image {
+    width: 50%;
+    margin: 0 auto;
+}
+"""
+def gradio_interface(prompt, arousal, valence, model_scale, seed=42):
+    return generate_image(prompt, arousal, valence, model_scale, seed)
+html_content = """
+<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+  <div>
+    <h1>Emoticrafter</h1>
+    <span>Emotion-based image generation using Stable Diffusion XL</span>
+    <br>
+    <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+      <a href="http://arxiv.org/abs/2501.05710"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
+      <a href="https://github.com/idvxlab/EmotiCrafter"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
+    </div>
+  </div>
+</div>
+"""
+with gr.Blocks() as iface:
+    gr.HTML(html_content)
+    description = """
+        **You can inject emotions into pictures by adjusting the values of arousal and valence!**
+        The Arousal-Valence model is a two-dimensional framework used in psychology and affective computing to describe emotional states.
+        - **Valence**: Measures the degree of emotional pleasantness, ranging from negative (e.g., sadness, anger) to positive (e.g., happiness, satisfaction). Scale: -3 (very unpleasant) to 3 (very pleasant).
+        - **Arousal**: Measures level of emotional activation, from low (e.g., calm) to high (e.g., excited). Scale: -3 (very calm) to 3 (very excited).
+    """
+    gr.Markdown(description)
+    with gr.Row():
+        with gr.Column(scale=2.25):
+            gr.Markdown("<i>Arousal-Valence Model</i>")
+            gr.Image("assets/emotion.png", label="Emotion Coordinate System")
+        with gr.Column(scale=5):
+            gr.Markdown("<i>From left to right: Valence increases</i>")
+            gr.Image("assets/output_image.png", label="Valence increasing")
+            gr.Markdown("<i>From left to right: Arousal increases</i>")
+            gr.Image("assets/output_image3.png", label="Arousal increasing")
+    with gr.Row():
+        with gr.Column(scale=2.25):
+            prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt for image generation")
+            arousal_slider = gr.Slider(minimum=-3.0, maximum=3.0, step=0.1, label="Arousal", value=0.0)
+            valence_slider = gr.Slider(minimum=-3.0, maximum=3.0, step=0.1, label="Valence", value=0.0)
+            model_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.25, label="Model Scale", value=1.5)
+            seed = gr.Slider(0, 10000000, step=1, label="Seed", value=42)
+            submit_btn = gr.Button("Generate")
+        with gr.Column(scale=5):
+            output_image = gr.Image(type="pil", height=1024, width=1024)
+    submit_btn.click(fn=gradio_interface, inputs=[prompt, arousal_slider, valence_slider, model_slider, seed], outputs=output_image)
+if __name__ == "__main__":
+    iface.launch(debug=True)

assets/emotion.png ADDED Viewed

assets/output_image.png ADDED Viewed

Git LFS Details

SHA256: c9151db51417182d8e641c845c710e85c9b0079467d758786f5a61735dc3e077
Pointer size: 132 Bytes
Size of remote file: 1.14 MB

assets/output_image3.png ADDED Viewed

Git LFS Details

SHA256: 7c3db8ac036943fa9abb13fb7867d48a8bd9b03f3c228f819466acf2e9d58715
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

model.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import torch
+from transformers import GPT2Model, GPT2Config
+from transformers.modeling_utils import PreTrainedModel, Conv1D, prune_conv1d_layer, SequenceSummary
+from transformers.models.gpt2.modeling_gpt2 import (
+    GPT2Block, GPT2Attention, GPT2MLP
+)
+from torch import nn
+class Cond_Attention(GPT2Attention):
+    def __init__(self, nx, n_ctx, config, is_cross_attention=False):
+        super(GPT2Attention, self).__init__()
+        self.output_attentions = config.output_attentions
+        n_state = nx
+        assert n_state % config.n_head == 0
+        self.embed_dim = config.n_embd
+        self.num_heads = config.n_head
+        self.head_dim = self.embed_dim // self.num_heads
+        self.split_size = n_state
+        self.scale_attn_weights = config.scale_attn_weights
+        self.is_cross_attention = is_cross_attention
+        self.c_attn = Conv1D(n_state * 3, nx)
+        self.c_proj = Conv1D(n_state, nx)
+        self.attn_dropout = nn.Dropout(config.attn_pdrop)
+        self.resid_dropout = nn.Dropout(config.resid_pdrop)
+        self.pruned_heads = set()
+        self.c_z = Conv1D(n_state * 2, nx)
+    def _attn(self, query, key, value, attention_mask=None, head_mask=None):
+        attn_weights = torch.matmul(query, key.transpose(-1, -2))
+        if self.scale_attn_weights:
+            attn_weights = attn_weights / torch.full(
+                [], value.size(-1) ** 0.5, dtype=attn_weights.dtype, device=attn_weights.device
+            )
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights.type(value.dtype)
+        attn_weights = self.attn_dropout(attn_weights)
+        if head_mask is not None:
+            attn_weights = attn_weights * head_mask
+        attn_output = torch.matmul(attn_weights, value)
+        return attn_output, attn_weights
+    def forward(self, x, z, layer_past=None, attention_mask=None, head_mask=None, use_cache=True, output_attentions=False):
+        x = self.c_attn(x)
+        query, key, value = x.split(self.split_size, dim=2)
+        query = self._split_heads(query, self.num_heads, self.head_dim)
+        key = self._split_heads(key, self.num_heads, self.head_dim)
+        value = self._split_heads(value, self.num_heads, self.head_dim)
+        if layer_past is not None:
+            past_key, past_value = layer_past
+            key = torch.cat((past_key, key), dim=-2)
+            value = torch.cat((past_value, value), dim=-2)
+        if use_cache:
+            present = (key, value)
+        else:
+            present = None
+        z_conv = self.c_z(z)
+        key_z, value_z = z_conv.split(self.split_size, dim=2)
+        key_z = self._split_heads(key_z, self.num_heads, self.head_dim)
+        value_z = self._split_heads(value_z, self.num_heads, self.head_dim)
+        key = key_z
+        value = value_z
+        attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
+        attn_output = self._merge_heads(attn_output, self.num_heads, self.head_dim)
+        attn_output = self.c_proj(attn_output)
+        attn_output = self.resid_dropout(attn_output)
+        outputs = (attn_output, present)
+        if output_attentions:
+            outputs += (attn_weights,)
+        return outputs
+class Cond_Block(GPT2Block):
+    def __init__(self, config,activate_a = False,activate_v = False):
+        super(GPT2Block, self).__init__()
+        self.activate_a = activate_a
+        self.activate_v = activate_v
+        nx = config.n_embd
+        self.ln_1 = nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.attn =  Cond_Attention(nx,config.n_ctx,config)
+        self.attn_a =None if not self.activate_a else Cond_Attention(nx,config.n_ctx,config)
+        self.ln_a = None if not self.activate_a else nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.attn_v =None if not self.activate_v else Cond_Attention(nx,config.n_ctx,config)
+        self.ln_v = None if not self.activate_v else nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.ln_2 = nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.mlp = GPT2MLP(4 * nx, config)
+    def forward(self, x, a,v, layer_past=None, attention_mask=None, head_mask=None):
+        residual = x
+        x = self.ln_1(x)
+        attn_outputs = self.attn(
+            x=x,
+            z=x
+        )
+        attn_output = attn_outputs[0]
+        # outputs = attn_outputs[1:]
+        x = x + attn_output
+        if self.activate_a:
+            x = self.ln_a(x)
+            cross_attn_outputs = self.attn_a(
+                x=x,
+                z=a
+            )
+            cross_attn_output = cross_attn_outputs[0]
+            x = x + cross_attn_output
+        if self.activate_v:
+            x = self.ln_v(x)
+            cross_attn_outputs = self.attn_v(
+                x=x,
+                z=v
+            )
+            cross_attn_output = cross_attn_outputs[0]
+            x = x + cross_attn_output
+        m = self.mlp(self.ln_2(x))
+        x = x + m
+        outputs = (x,)
+        return outputs
+class EmotionInjectionTransformer(GPT2Model):
+    def __init__(self, config, final_out_type="Linear+LN",sd_feature_dim=2048):
+        super(GPT2Model, self).__init__(config)
+        self.add_attn = True
+        self.sd_feature_dim = sd_feature_dim
+        self.activate_a = True
+        self.activate_v = True
+        self.output_hidden_states = config.output_hidden_states
+        self.output_attentions = config.output_attentions
+        self.use_cache = config.use_cache
+        self.embed_dim = config.n_embd
+        self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
+        self.wpe = nn.Embedding(config.n_positions, self.embed_dim)
+        self.drop = nn.Dropout(config.embd_pdrop)
+        self.xl_feature2gpt_feature = nn.Linear(self.sd_feature_dim,config.n_embd,bias=False)
+        self.gpt_feature2xl_feature = nn.Linear(config.n_embd,self.sd_feature_dim,bias=False)
+        if final_out_type == "Linear+LN" or final_out_type=="Linear+LN+noResidual":
+            self.ln_xl_feature = nn.LayerNorm(self.sd_feature_dim, eps=1e-5)
+        elif final_out_type == "Linear+LN+Linear" or final_out_type=="Linear+LN+Linear+noResidual":
+            self.ln_xl_feature = nn.LayerNorm(self.sd_feature_dim, eps=1e-5)
+            self.ff = nn.Linear(self.sd_feature_dim,self.sd_feature_dim,bias=False)
+        else:
+            raise NotImplementedError
+        self.init_weights()
+        self.cross_token = 16
+        self.a_f = nn.Sequential(
+            nn.Linear(1, 256),
+            nn.ReLU(),
+            nn.Linear(256, config.n_embd*self.cross_token if self.activate_a else config.n_embd)
+        )
+        self.v_f = nn.Sequential(
+            nn.Linear(1, 256),
+            nn.ReLU(),
+            nn.Linear(256, config.n_embd*self.cross_token if self.activate_v else config.n_embd)
+        )
+        if self.add_attn:
+            self.attn_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
+            self.h = nn.ModuleList([Cond_Block(config,self.activate_a,self.activate_v) for _ in range(config.n_layer)])
+        else:
+            self.h = nn.ModuleList([GPT2Block(config) for _ in range(config.n_layer)])
+        self.final_out_type = final_out_type
+        self.ln_f = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_epsilon)
+    def forward(
+            self,
+            input_ids=None,
+            past_key_values=None,
+            attention_mask=None,
+            token_type_ids=None,
+            position_ids=None,
+            head_mask=None,
+            inputs_embeds=None,
+            arousal=None,
+            valence=None,
+    ):
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            input_shape = input_ids.size()
+            input_ids = input_ids.view(-1, input_shape[-1])
+            batch_size = input_ids.shape[0]
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+            batch_size = inputs_embeds.shape[0]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+        if token_type_ids is not None:
+            token_type_ids = token_type_ids.view(-1, input_shape[-1])
+        if position_ids is not None:
+            position_ids = position_ids.view(-1, input_shape[-1])
+        if past_key_values is None:
+            past_length = 0
+            past_key_values = [None] * len(self.h)
+        else:
+            past_length = past_key_values[0][0].size(-2)
+        if position_ids is None:
+            position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
+            position_ids = position_ids.unsqueeze(0).view(-1, input_shape[-1])
+        head_mask = self.get_head_mask(head_mask, self.config.n_layer)
+        if inputs_embeds is None:
+            inputs_embeds = self.wte(input_ids)
+        else:
+            residual = inputs_embeds
+            inputs_embeds = self.xl_feature2gpt_feature(inputs_embeds)
+        position_embeds = self.wpe(position_ids)
+        hidden_states = inputs_embeds + position_embeds
+        hidden_states = self.drop(hidden_states)
+        a_feature = self.attn_proj(self.a_f(arousal).view(-1, self.cross_token, self.config.n_embd) )
+        v_feature = self.attn_proj(self.v_f(valence).view(-1, self.cross_token, self.config.n_embd) )
+        output_shape = input_shape + (hidden_states.size(-1),)
+        all_self_attentions = () if self.output_attentions else None
+        all_hidden_states = () if self.output_hidden_states else None
+        for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
+            if self.output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            outputs = block(
+                hidden_states, a = a_feature,v = v_feature, layer_past=layer_past, attention_mask=attention_mask, head_mask=head_mask[i]
+            )
+            hidden_states = outputs[0]
+            if self.output_attentions:
+                all_self_attentions = all_self_attentions + (outputs[2 if self.use_cache else 1],)
+        hidden_states = self.ln_f(hidden_states)
+        if self.final_out_type == "Linear+LN":
+            hidden_states = residual+self.ln_xl_feature(self.gpt_feature2xl_feature(hidden_states))
+        elif self.final_out_type == "Linear+LN+noResidual":
+            hidden_states = self.ln_xl_feature(self.gpt_feature2xl_feature(hidden_states))
+        elif self.final_out_type == "Linear+LN+Linear":
+            hidden_states = residual+self.ff(self.ln_xl_feature(self.gpt_feature2xl_feature(hidden_states)))
+        elif self.final_out_type == "Linear+LN+Linear+noResidual":
+            hidden_states = self.ff(self.ln_xl_feature(self.gpt_feature2xl_feature(hidden_states)))
+        elif self.final_out_type == "Linear+noResidual":
+            hidden_states = self.gpt_feature2xl_feature(hidden_states)
+        else:
+            hidden_states = residual+self.gpt_feature2xl_feature(hidden_states)
+        if self.output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+        outputs = (hidden_states,)
+        if self.output_hidden_states:
+            outputs = outputs + (all_hidden_states,)
+        if self.output_attentions:
+            attention_output_shape = input_shape[:-1] + (-1,) + all_self_attentions[0].shape[-2:]
+            all_attentions = tuple(t.view(*attention_output_shape) for t in all_self_attentions)
+            outputs = outputs + (all_attentions,)
+        return outputs

requirements.txt ADDED Viewed

	@@ -0,0 +1,241 @@

+absl-py==2.1.0
+accelerate
+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+alembic==1.13.2
+altair==5.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.4.0
+astunparse==1.6.3
+attrs==23.2.0
+banal==1.0.6
+bleach==6.1.0
+blinker==1.8.2
+blis==0.7.11
+braceexpand==0.1.7
+cachetools==5.5.0
+catalogue==2.0.10
+certifi==2024.6.2
+cfgv==3.4.0
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpathlib==0.18.1
+confection==0.1.5
+contexttimer==0.3.3
+contourpy==1.2.1
+cycler==0.12.1
+cymem==2.0.8
+datasets==2.20.0
+decord==0.6.0
+diffusers==0.31.0
+transformers==4.46.3
+dill==0.3.8
+distlib==0.3.8
+distro==1.9.0
+docker-pycreds==0.4.0
+einops==0.8.0
+fairscale
+fastapi==0.112.0
+ffmpy==0.4.0
+filelock==3.15.1
+Flask==3.0.3
+Flask-Cors==4.0.1
+flatbuffers==24.3.25
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.5.0
+ftfy==6.2.3
+gast==0.6.0
+gitdb==4.0.11
+gitlab==1.0.2
+GitPython==3.1.43
+google-pasta==0.2.0
+gradio==4.41.0
+gradio_client==1.3.0
+greenlet==3.0.3
+grpcio==1.64.1
+h11==0.14.0
+h5py==3.11.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.23.4
+identify==2.6.0
+idna==3.7
+imageio==2.34.1
+importlib_metadata==7.1.0
+importlib_resources==6.4.0
+invisible-watermark
+iopath==0.1.10
+itsdangerous==2.2.0
+Jinja2==3.1.4
+joblib==1.4.2
+jsonlines==4.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+kaggle==1.6.17
+keras==3.4.1
+kiwisolver==1.4.5
+langcodes==3.4.0
+language_data==1.2.0
+lazy_loader==0.4
+libclang==18.1.1
+lightning-utilities==0.11.6
+lmdb==1.4.1
+lmdbdict==0.2.2
+Mako==1.3.5
+marisa-trie==1.2.0
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+ml-dtypes==0.4.0
+modelscope==1.16.1
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+murmurhash==1.0.10
+namex==0.0.8
+narwhals==1.4.2
+networkx
+nodeenv==1.9.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12
+nvidia-nvjitlink-cu12==12.5.40
+nvidia-nvtx-cu12==12.1.105
+omegaconf==2.3.0
+openai==0.28.0
+opencv-python-headless==4.5.5.64
+opendatasets==0.1.22
+opt-einsum==3.3.0
+optree==0.12.1
+orjson==3.10.7
+pandas==2.2.2
+peft
+pillow==10.3.0
+piq==0.8.0
+plotly==5.23.0
+portalocker==2.10.1
+pre-commit==3.8.0
+preshed==3.0.9
+protobuf==4.25.3
+psutil==6.0.0
+pyarrow==17.0.0
+pyarrow-hotfix==0.6
+pycocoevalcap==1.2
+pycocotools==2.0.8
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydeck==0.9.1
+pydub==0.25.1
+pyparsing==3.1.2
+python-gitlab==4.6.0
+python-magic==0.4.27
+python-multipart==0.0.9
+python-slugify==8.0.4
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==13.7.1
+rpds-py==0.20.0
+ruff==0.5.7
+safetensors==0.4.3
+salesforce-lavis
+scikit-image==0.24.0
+scikit-learn==1.5.0
+scipy==1.13.1
+seaborn==0.13.2
+semantic-version==2.10.0
+sentencepiece==0.2.0
+sentry-sdk==2.10.0
+setproctitle==1.3.3
+shellingham==1.5.4
+smart-open==7.0.4
+smmap==5.0.1
+sniffio==1.3.1
+spacy==3.7.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+SQLAlchemy==1.4.52
+srsly==2.4.8
+starlette==0.37.2
+streamlit==1.37.1
+sympy==1.12.1
+tenacity==8.5.0
+tensorboard==2.17.0
+tensorboard-data-server==0.7.2
+tensorflow==2.17.0
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==2.4.0
+text-unidecode==1.3
+thinc==8.2.5
+threadpoolctl==3.5.0
+tifffile==2024.6.18
+timm
+tokenizers
+toml==0.10.2
+tomlkit==0.12.0
+torch==2.2.0
+torchmetrics
+torchstat
+torchsummary==1.5.1
+torchvision
+tqdm==4.66.4
+triton==2.2.0
+typer==0.12.3
+tzdata==2024.1
+urllib3==2.2.2
+uvicorn==0.30.5
+virtualenv==20.26.3
+wandb==0.17.5
+wasabi==1.1.3
+watchdog==4.0.2
+weasel==0.4.1
+webdataset==0.2.96
+webencodings==0.5.1
+websockets==12.0
+Werkzeug==3.0.3
+wrapt==1.16.0
+xformer==1.0.1
+xxhash==3.4.1
+yarl==1.9.4