Skip to content

Commit

Permalink
Added json exports for pipeline and generative model,
Browse files Browse the repository at this point in the history
to keep track of hyperparameters in upcoming experiments
  • Loading branch information
lenhoanglnh committed Jan 22, 2024
1 parent 6f7e513 commit a64804a
Show file tree
Hide file tree
Showing 29 changed files with 261 additions and 38 deletions.
4 changes: 3 additions & 1 deletion solidago/experiments/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,12 @@ def sample_n_scores(n_users=50, n_entities=20, n_seeds=5):
return list(sample_score(n_users, n_entities, seed) for seed in range(n_seeds))

def vary_p_trustworthy(n_users=50, n_entities=20, n_seeds=5, ps=[0, .2, .5, .5, 1]):
default_value = generative_model.user_model.p_trustworthy
results = list()
for p in ps:
generative_model.user_model.p_trustworthy = p
results.append(sample_n_scores(n_users, n_entities, n_seeds))
generative_model.user_model.p_trustworthy = default_value
return ps, results


print(vary_p_trustworthy(50, 10))
2 changes: 2 additions & 0 deletions solidago/src/solidago/aggregation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ def __call__(
"""
raise NotImplementedError

def to_json(self):
return (type(self).__name__, )
4 changes: 4 additions & 0 deletions solidago/src/solidago/aggregation/standardized_qrmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@ def __call__(

return scaled_models, global_scores

def to_json(self):
return type(self).__name__, dict(
dev_quantile=self.dev_quantile, lipschitz=self.lipschitz, error=self.error
)
18 changes: 14 additions & 4 deletions solidago/src/solidago/generative_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import numpy as np
import pandas as pd

from .user_model import UserModel, SvdUserModel
from .user_model import UserModel, NormalUserModel
from .vouch_model import VouchModel, ErdosRenyiVouchModel
from .entity_model import EntityModel, SvdEntityModel
from .entity_model import EntityModel, NormalEntityModel
from .engagement_model import EngagementModel, SimpleEngagementModel
from .comparison_model import ComparisonModel, KnaryGBT

Expand All @@ -22,9 +22,9 @@
class GenerativeModel:
def __init__(
self,
user_model: UserModel = SvdUserModel(),
user_model: UserModel = NormalUserModel(svd_dimension=5),
vouch_model: VouchModel = ErdosRenyiVouchModel(),
entity_model: EntityModel = SvdEntityModel(),
entity_model: EntityModel = NormalEntityModel(svd_dimension=5),
engagement_model: EngagementModel = SimpleEngagementModel(),
comparison_model: ComparisonModel = KnaryGBT(21, 10)
):
Expand Down Expand Up @@ -99,3 +99,13 @@ def __call__(
judgments.comparisons = self.comparison_model(users, entities, judgments.comparisons)
return users, vouches, entities, privacy, judgments


def to_json(self):
return dict(
user_model=self.user_model.to_json(),
vouch_model=self.vouch_model.to_json(),
entity_model=self.entity_model.to_json(),
engagement_model=self.engagement_model.to_json(),
comparison_model=self.comparison_model.to_json()
)

8 changes: 8 additions & 0 deletions solidago/src/solidago/generative_model/comparison_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def __call__(

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )


class GeneralizedBradleyTerry(ComparisonModel):
""" The Generalized Bradley-Terry model is a score-to-comparison model
Expand Down Expand Up @@ -183,3 +187,7 @@ def comparison_generator(self):
def __str__(self):
return f"K-naryGBT(K={self.n_options}, comparison_max={self.comparison_max})"

def to_json(self):
return type(self).__name__, dict(
n_options=self.n_options, comparison_max=self.comparison_max
)
11 changes: 11 additions & 0 deletions solidago/src/solidago/generative_model/engagement_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ def __call__(

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )


class SimpleEngagementModel(EngagementModel):
def __init__(
self,
Expand Down Expand Up @@ -101,3 +105,10 @@ def __call__(
def __str__(self):
properties = f"p_per_criterion={self.p_per_criterion}, p_private={self.p_private}"
return f"SimpleEngagementModel({properties})"

def to_json(self):
return type(self).__name__, dict(
p_per_criterion=self.p_per_criterion,
p_private=self.p_private
)

103 changes: 103 additions & 0 deletions solidago/src/solidago/generative_model/entity_model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from abc import ABC, abstractmethod
from typing import Optional, Union

import pandas as pd
import numpy as np

from inspect import getsource


class EntityModel(ABC):
@abstractmethod
def __call__(self, n_entities: int) -> pd.DataFrame:
Expand All @@ -23,6 +27,10 @@ def __call__(self, n_entities: int) -> pd.DataFrame:

def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )


class SvdEntityModel(EntityModel):
def __init__(
Expand Down Expand Up @@ -70,3 +78,98 @@ def __call__(self, n_entities: int):

def __str__(self):
return f"SvdEntityModel(svd_dimension={self.svd_dimension})"

def to_json(self):
return type(self).__name__, dict(svd_dimension=svd_dimension)


class SvdEntityModel(EntityModel):
def __init__(
self,
svd_dimension: int=5,
svd_distribution: Optional[callable]=None
):
""" This model assumes each entity can be represented by a vector in a singular
value decomposition. This assumes user preferences will have such a representation
as well. To model the fact that users mostly agree, we assume that the center of
the distribution equals the standard deviation.
Parameters
----------
svd_dimension: int
Dimension of the vector representation
svd_distribution: callable
Given svd_dimension, generates a random vector
"""
self.svd_dimension = svd_dimension
self.svd_distribution = svd_distribution
if svd_distribution is None:
self.svd_distribution = lambda: np.random.normal(0, 1, svd_dimension)

def __call__(self, n_entities: int):
""" Generates n_users users, with different characteristics
Parameters
----------
n_entities: int
Number of entities to generate.
Returns
-------
entities: DataFrame with columns
* `entity_id`: int
* for each i in range(self.svd_dimension), `svd{i}`: float
"""
dct = dict()
svd = [self.svd_distribution() for _ in range(n_entities)]
df = pd.DataFrame({
f"svd{i}": [svd[e][i] for e in range(n_entities)]
for i in range(self.svd_dimension)
})
df.index.name = "entity_id"
return df

def __str__(self):
return f"SvdEntityModel(svd_dimension={self.svd_dimension})"

def to_json(self):
try:
return type(self).__name__, dict(
svd_dimension=self.svd_dimension,
svd_distribution=getsource(self.svd_distribution)
)
except:
return type(self).__name__, dict(
svd_dimension=svd_dimension,
svd_distribution="Custom distribution"
)


class NormalEntityModel(SvdEntityModel):
def __init__(self, mean: Optional[np.ndarray]=None, svd_dimension: Optional[int]=None):
""" This model assumes each entity can be represented by a vector in a singular
value decomposition. This assumes user preferences will have such a representation
as well. To model the fact that users mostly agree, we assume that the center of
the distribution equals the standard deviation.
Parameters
----------
svd_dimension: int
Dimension of the vector representation
svd_distribution: callable
Given svd_dimension, generates a random vector
"""
assert mean is not None or svd_dimension is not None
if mean is None and svd_dimension is None:
assert len(mean) == svd_dimension
if mean is None:
mean = np.zeros(svd_dimension)
super().__init__(len(mean), lambda: np.random.normal(0, 1, len(mean)) + mean)
self.mean = mean

def __str__(self):
return f"NormalEntityModel(mean={list(self.mean)})"

def to_json(self):
return type(self).__name__, dict(mean=list(self.mean))

40 changes: 28 additions & 12 deletions solidago/src/solidago/generative_model/user_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ def __call__(self, n_users):
def __str__(self):
return type(self).__name__

class SvdUserModel(UserModel):
def to_json(self):
return (type(self).__name__, )

class NormalUserModel(UserModel):
def __init__(
self,
p_trustworthy: float = 0.8,
p_pretrusted: float = 0.2,
zipf_vouch: float = 2.0,
zipf_compare: float = 1.5,
poisson_compare: float = 30.0,
n_comparisons_per_entity: float = 3.0,
svd_dimension: int = 5,
svd_distribution: callable = lambda dim: np.random.normal(1, 1, dim)
p_trustworthy: float=0.8,
p_pretrusted: float=0.2,
zipf_vouch: float=2.0,
zipf_compare: float=1.5,
poisson_compare: float=30.0,
n_comparisons_per_entity: float=3.0,
svd_dimension: int=5,
):
""" This model assumes each user's preferences can be represented by a vector in
a singular value decomposition. This assumes entities will have such a representation
Expand All @@ -54,7 +56,6 @@ def __init__(
assert p_pretrusted >= 0 and p_pretrusted <= 1
assert zipf_vouch > 1.0 and zipf_compare > 1.0
assert poisson_compare > 0 and n_comparisons_per_entity > 0
assert len(svd_distribution(svd_dimension)) == svd_dimension

self.p_trustworthy = p_trustworthy
self.p_pretrusted = p_pretrusted
Expand All @@ -63,7 +64,11 @@ def __init__(
self.poisson_compare = poisson_compare
self.n_comparisons_per_entity = n_comparisons_per_entity
self.svd_dimension = svd_dimension
self.svd_distribution = svd_distribution
self.mean = np.zeros(svd_dimension)
self.mean[0] = 1

def svd_sample(self):
return np.random.normal(0, 1, self.svd_dimension) + self.mean

def __call__(self, n_users: int):
""" Generates n_users users, with different characteristics
Expand Down Expand Up @@ -99,7 +104,7 @@ def __call__(self, n_users: int):
dct["is_pretrusted"] = (np.random.random(n_users) < self.p_pretrusted)
dct["is_pretrusted"] *= dct["is_trustworthy"]

svd = [self.svd_distribution(self.svd_dimension) for _ in range(n_users)]
svd = [self.svd_sample() for _ in range(n_users)]
for i in range(self.svd_dimension):
dct[f"svd{i}"] = [svd[u][i] for u in range(n_users)]

Expand All @@ -112,3 +117,14 @@ def __str__(self):
"zipf_compare", "poisson_compare", "n_comparisons_per_entity","svd_dimension"]
properties = ", ".join([f"{p}={getattr(self,p)}" for p in printed_properties])
return f"SvdUserModel({properties})"

def to_json(self):
return type(self).__name__, dict(
p_trustworthy=self.p_trustworthy,
p_pretrusted=self.p_pretrusted,
zipf_vouch=self.zipf_vouch,
zipf_compare=self.zipf_compare,
poisson_compare=self.poisson_compare,
n_comparisons_per_entity=self.n_comparisons_per_entity,
svd_dimension=self.svd_dimension,
)
4 changes: 4 additions & 0 deletions solidago/src/solidago/generative_model/vouch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def __call__(self, users: pd.DataFrame):
def __str__(self):
return type(self).__name__

def to_json(self):
return (type(self).__name__, )


class ErdosRenyiVouchModel(VouchModel):
def __call__(self, users: pd.DataFrame):
""" Generates vouches between users
Expand Down
11 changes: 10 additions & 1 deletion solidago/src/solidago/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class DefaultPipeline:
prior_std_dev=7,
convergence_error=1e-5,
cumulant_generating_function_error=1e-5,
initialization=dict()
)
scaling: Scaling = ScalingCompose(
Mehestan(
Expand Down Expand Up @@ -218,3 +217,13 @@ def __call__(

return users, voting_rights, user_models, global_model

def to_json(self):
return dict(
trust_propagation=self.trust_propagation.to_json(),
voting_rights=self.voting_rights.to_json(),
preference_learning=self.preference_learning.to_json(),
scaling=self.scaling.to_json(),
aggregation=self.aggregation.to_json(),
post_process=self.post_process.to_json()
)

3 changes: 3 additions & 0 deletions solidago/src/solidago/post_process/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ def __call__(
global_model: post-processed global model
"""
raise NotImplementedError

def to_json(self):
return (type(self).__name__, )
3 changes: 3 additions & 0 deletions solidago/src/solidago/post_process/squash.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ def __call__(
squashed_global_model = PostProcessedScoringModel(global_model, squash)

return squashed_user_models, squashed_global_model

def to_json(self):
return (type(self).__name__, dict(score_max=self.score_max))
3 changes: 3 additions & 0 deletions solidago/src/solidago/preference_learning/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def __call__(
model: ScoringModel
"""
raise NotImplementedError

def to_json(self):
return (type(self).__name__, )


class ComparisonBasedPreferenceLearning(PreferenceLearning):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,8 @@ def __init__(
self,
prior_std_dev: float=7,
comparison_max: float=10,
initialization: dict[int, float]=dict(),
convergence_error: float=1e-5,
cumulant_generating_function_error: float=1e-5
cumulant_generating_function_error: float=1e-5,
):
"""
Expand All @@ -180,7 +179,7 @@ def __init__(
error: float
tolerated error
"""
super().__init__(prior_std_dev, initialization, convergence_error)
super().__init__(prior_std_dev, convergence_error)
self.comparison_max = comparison_max
self.cumulant_generating_function_error = cumulant_generating_function_error

Expand Down Expand Up @@ -217,4 +216,11 @@ def cumulant_generating_function_second_derivative(self, score_diff: float) -> f
return (1/3) - (score_diff**2 / 15)
return 1 - (1 / np.tanh(score_diff)**2) + (1 / score_diff**2)

def to_json(self):
return type(self).__name__, dict(
prior_std_dev=self.prior_std_dev,
comparison_max=self.comparison_max,
convergence_error=self.convergence_error,
cumulant_generating_function_error=self.cumulant_generating_function_error,
)

Loading

0 comments on commit a64804a

Please sign in to comment.