Merge branch 'stable'

gitcoinco · Sep 22, 2021 · ab7ffd5 · ab7ffd5
2 parents 05eb0cf + 4705b1d
commit ab7ffd5
Show file tree

Hide file tree

Showing 3 changed files with 153 additions and 7 deletions.
diff --git a/app/app/db.py b/app/app/db.py
@@ -9,7 +9,7 @@ def db_for_read(self, model, **hints):
         Reads go to a randomly-chosen replica if backend node
         Else go to default DB
         """
-        replicas = ['read_replica_1', 'read_replica_2', 'read_replica_3']
+        replicas = ['read_replica_1', 'read_replica_2']
         return random.choice(replicas)
 
     def db_for_write(self, model, **hints):
@@ -23,7 +23,7 @@ def allow_relation(self, obj1, obj2, **hints):
         Relations between objects are allowed if both objects are
         in the primary/replica pool.
         """
-        db_set = {'default', 'read_replica_1', 'read_replica_2', 'read_replica_3'}
+        db_set = {'default', 'read_replica_1', 'read_replica_2'}
         if obj1._state.db in db_set and obj2._state.db in db_set:
             return True
         return True # TODO: be more stringent about this IFF we ever have a situation in which diff tables are on diff DBs

diff --git a/app/app/settings.py b/app/app/settings.py
@@ -212,7 +212,6 @@
         'default': env.db(),
         'read_replica_1': env.db('READ_REPLICA_1_DATABASE_URL'),
         'read_replica_2': env.db('READ_REPLICA_2_DATABASE_URL'),
-        'read_replica_3': env.db('READ_REPLICA_3_DATABASE_URL')
         }
     DATABASE_ROUTERS = ['app.db.PrimaryDBRouter']
 
@@ -772,7 +771,7 @@ def callback(request):
 S3_REPORT_BUCKET = env('S3_REPORT_BUCKET', default='')  # TODO
 S3_REPORT_PREFIX = env('S3_REPORT_PREFIX', default='')  # TODO
 
-S3_BSCI_SYBIL_BUCKET = env('S3_REPORT_BUCKET', default='')  # TODO
+S3_BSCI_SYBIL_BUCKET = env('S3_BSCI_SYBIL_BUCKET', default='')  # TODO
 
 INSTALLED_APPS += env.list('DEBUG_APPS', default=[])
 

diff --git a/app/grants/clr.py b/app/grants/clr.py
@@ -23,7 +23,147 @@
 from django.utils import timezone
 
 import numpy as np
-from grants.clr_data_src import fetch_grants, fetch_summed_contributions
+from grants.clr_data_src import fetch_contributions, fetch_grants
+
+
+def populate_data_for_clr(grants, contributions, clr_round):
+    '''
+        Populate Data needed to calculate CLR
+
+        Args:
+            grants                  : grants list
+            contributions           : contributions list for those grants
+            clr_round               : GrantCLR
+
+        Returns:
+            contrib_data_list: {
+                'id': grant_id,
+                'contributions': summed_contributions
+            }
+
+    '''
+
+    contrib_data_list = []
+
+    if not clr_round:
+        print('Error: populate_data_for_clr - missing clr_round')
+        return contrib_data_list
+
+    clr_start_date = clr_round.start_date
+    clr_end_date = clr_round.end_date
+
+    mechanism="profile"
+
+    # 3-4s to get all the contributions
+    _contributions = list(contributions.filter(created_on__gte=clr_start_date, created_on__lte=clr_end_date).prefetch_related('profile_for_clr', 'subscription'))
+    _contributions_by_id = {}
+    for ele in _contributions:
+        key = ele.normalized_data.get('id')
+        if key not in _contributions_by_id.keys():
+            _contributions_by_id[key] = []
+        _contributions_by_id[key].append(ele)
+
+    # set up data to load contributions for each grant
+    for grant in grants:
+        grant_id = grant.defer_clr_to.pk if grant.defer_clr_to else grant.id
+
+        # contributions
+        contribs = _contributions_by_id.get(grant.id, [])
+
+        # create arrays
+        contributing_profile_ids = []
+        contributions_by_id = {}
+        for c in contribs:
+            prof = c.profile_for_clr
+            if prof:
+                key = prof.id
+                if key not in contributions_by_id.keys():
+                    contributions_by_id[key] = []
+                contributions_by_id[key].append(c)
+                contributing_profile_ids.append((prof.id, prof.trust_bonus))
+
+        contributing_profile_ids = list(set(contributing_profile_ids))
+
+        summed_contributions = []
+
+        # contributions
+        if len(contributing_profile_ids) > 0:
+            for profile_id, trust_bonus in contributing_profile_ids:
+                sum_of_each_profiles_contributions = sum(ele.normalized_data.get('amount_per_period_usdt') for ele in contributions_by_id[profile_id]) * float(clr_round.contribution_multiplier)
+
+                summed_contributions.append({
+                    'id': str(profile_id),
+                    'sum_of_each_profiles_contributions': sum_of_each_profiles_contributions,
+                    'profile_trust_bonus': trust_bonus
+                })
+
+            contrib_data_list.append({
+                'id': grant_id,
+                'contributions': summed_contributions
+            })
+
+    return contrib_data_list
+
+
+def translate_data(grants_data):
+    '''
+        translates django grant data structure to a list of lists
+
+        args:
+            django grant data structure
+                {
+                    'id': (string) ,
+                    'contibutions' : [
+                        {
+                            contributor_profile (str) : summed_contributions
+                        }
+                    ]
+                }
+
+        returns:
+            list of lists of grant data
+                [[grant_id (str), user_id (str), contribution_amount (float)]]
+            dictionary of profile_ids and trust scores
+                {user_id (str): trust_score (float)}
+    '''
+    trust_dict = {}
+    grants_list = []
+    for g in grants_data:
+        grant_id = g.get('id')
+        for c in g.get('contributions'):
+            profile_id = c.get('id')
+            trust_bonus = c.get('profile_trust_bonus')
+            if profile_id:
+                val = [grant_id] + [c.get('id')] + [c.get('sum_of_each_profiles_contributions')]
+                grants_list.append(val)
+                trust_dict[profile_id] = trust_bonus
+
+    return grants_list, trust_dict
+
+
+def aggregate_contributions(grant_contributions):
+    '''
+        aggregates contributions by contributor, and calculates total contributions by unique pairs
+
+        args:
+            list of lists of grant data
+                [[grant_id (str), user_id (str), verification_status (str), trust_bonus (float), contribution_amount (float)]]
+
+        returns:
+            aggregated contributions by pair nested dict
+                {
+                    grant_id (str): {
+                        user_id (str): aggregated_amount (float)
+                    }
+                }
+    '''
+    contrib_dict = {}
+    for proj, user, amount in grant_contributions:
+        if proj not in contrib_dict:
+            contrib_dict[proj] = {}
+        contrib_dict[proj][user] = contrib_dict[proj].get(user, 0) + amount
+
+    return contrib_dict
 
 
 def get_totals_by_pair(contrib_dict):
@@ -235,8 +375,15 @@ def predict_clr(save_to_db=False, from_date=None, clr_round=None, network='mainn
     print(f"- starting fetch_grants at {round(time.time(),1)}")
     grants = fetch_grants(clr_round, network)
 
-    print(f"- starting get data and sum at {round(time.time(),1)}")
-    curr_agg, trust_dict = fetch_summed_contributions(grants, clr_round, network)
+    print(f"- starting fetch_contributions at {round(time.time(),1)}")
+    contributions = fetch_contributions(clr_round, network)
+
+    print(f"- starting sum (of {contributions.count()} contributions) at {round(time.time(),1)}")
+    grant_contributions_curr = populate_data_for_clr(grants, contributions, clr_round)
+    curr_round, trust_dict = translate_data(grant_contributions_curr)
+
+    # this aggregates the data into the expected format
+    curr_agg = aggregate_contributions(curr_round)
 
     if len(curr_agg) == 0:
         print(f'- done - no Contributions for CLR {clr_round.round_num}. Exiting')