File size: 1,420 Bytes
e3af1ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import numpy as np
from typing import Tuple, List, Optional
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
class LatentDirectionFinder:
"""
Provides methods to discover semantic directions in latent space using PCA or classifier-based approaches.
"""
def __init__(self, latent_vectors: np.ndarray):
"""
:param latent_vectors: Array of shape (N, D) where N is the number of samples and D is the latent dimension.
"""
self.latent_vectors = latent_vectors
def pca_direction(self, n_components: int = 10) -> Tuple[np.ndarray, np.ndarray]:
"""
Perform PCA on the latent vectors to find principal directions.
:return: (components, explained_variance)
"""
pca = PCA(n_components=n_components)
pca.fit(self.latent_vectors)
return pca.components_, pca.explained_variance_ratio_
def classifier_direction(self, labels: List[int]) -> np.ndarray:
"""
Fit a linear classifier to find a direction separating two classes in latent space.
:param labels: List of 0/1 labels for each latent vector.
:return: Normalized direction vector (D,)
"""
clf = LogisticRegression()
clf.fit(self.latent_vectors, labels)
direction = clf.coef_[0]
direction = direction / np.linalg.norm(direction)
return direction |