feat: added generate-nano-reps script (#84)

mistakia · Mar 25, 2024 · 8d5c877 · 8d5c877
1 parent 150b34c
commit 8d5c877
Show file tree

Hide file tree

Showing 7 changed files with 397 additions and 59 deletions.
diff --git a/common/index.mjs b/common/index.mjs
@@ -9,6 +9,7 @@ import request from './request.mjs'
 export * as cloudflare from './cloudflare.mjs'
 export { request }
 export { default as convertToCSV } from './convert-to-csv.mjs'
+export { default as read_csv } from './read-csv.mjs'
 
 const POST = (data) => ({
   method: 'POST',

diff --git a/common/read-csv.mjs b/common/read-csv.mjs
@@ -0,0 +1,14 @@
+import fs from 'fs'
+import csv from 'csv-parser'
+
+const read_csv = (filepath, options = {}) =>
+  new Promise((resolve, reject) => {
+    const results = []
+    fs.createReadStream(filepath)
+      .pipe(csv(options))
+      .on('data', (data) => results.push(data))
+      .on('error', (error) => resolve(error))
+      .on('end', () => resolve(results))
+  })
+
+export default read_csv
diff --git a/data/nano-reps.csv b/data/nano-reps.csv
diff --git a/db/schema.sql b/db/schema.sql
@@ -190,6 +190,8 @@ CREATE TABLE `representatives_meta` (
   `twitter` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
   `discord` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
   `github` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
+  `website` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
+  `email` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
 
   `timestamp` int(11) NOT NULL,
   UNIQUE KEY `account` (`account`, `timestamp`)
@@ -230,6 +232,8 @@ CREATE TABLE `representatives_meta_index` (
   `twitter` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
   `discord` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
   `github` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
+  `website` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
+  `email` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
 
   `timestamp` int(11) NOT NULL,
   UNIQUE KEY `account` (`account`)

diff --git a/package.json b/package.json
@@ -85,9 +85,11 @@
     "compression": "^1.7.4",
     "connected-react-router": "^6.9.3",
     "cors": "^2.8.5",
+    "csv-parser": "^3.0.0",
     "csv-writer": "^1.6.0",
     "dayjs": "^1.11.9",
     "debug": "^4.3.4",
+    "deep-diff": "^1.0.2",
     "deep-extend": "^0.6.0",
     "esm": "^3.2.25",
     "express": "^4.18.2",

diff --git a/scripts/generate-nano-reps.mjs b/scripts/generate-nano-reps.mjs
@@ -0,0 +1,217 @@
+import debug from 'debug'
+// import yargs from 'yargs'
+// import { hideBin } from 'yargs/helpers'
+import diff from 'deep-diff'
+import fetch from 'node-fetch'
+import { pipeline } from 'stream'
+import { promisify } from 'util'
+import os from 'os'
+import fs from 'fs'
+import { fileURLToPath } from 'url'
+import path, { dirname } from 'path'
+
+/* eslint-disable no-unused-vars */
+import db from '#db'
+import { isMain, read_csv, convertToCSV } from '#common'
+import { REPRESENTATIVE_TRACKING_MINIMUM_VOTING_WEIGHT } from '#constants'
+/* eslint-enable no-unused-vars */
+
+// const argv = yargs(hideBin(process.argv)).argv
+const log = debug('generate-nano-reps')
+debug.enable('generate-nano-reps')
+
+const generate_nano_reps = async () => {
+  // Fetch nano representatives data from GitHub and save to temp file
+  const current_date = new Date().toISOString().split('T')[0]
+  const filename = `nano-reps-${current_date}.csv`
+  const csv_download_path = `${os.tmpdir()}/${filename}`
+  const stream_pipeline = promisify(pipeline)
+  const nano_reps_resp = await fetch(
+    'https://raw.githubusercontent.com/mistakia/nano-community/main/data/nano-reps.csv'
+  )
+  await stream_pipeline(
+    nano_reps_resp.body,
+    fs.createWriteStream(`${csv_download_path}`)
+  )
+  // Read the downloaded CSV data
+  const csv_reps = await read_csv(csv_download_path, {
+    mapValues: ({ header, index, value }) => {
+      if (value === '') {
+        return null
+      }
+
+      if (header === 'alias') {
+        return value.replace(/"/g, '')
+      }
+
+      return value
+    }
+  })
+
+  // Fetch representatives data from the database
+  const db_reps = await db('representatives_meta_index')
+    .leftJoin(
+      'accounts',
+      'representatives_meta_index.account',
+      'accounts.account'
+    )
+    .leftJoin(
+      'accounts_meta_index',
+      'accounts.account',
+      'accounts_meta_index.account'
+    )
+    .whereNot(
+      'representatives_meta_index.account',
+      'nano_1111111111111111111111111111111111111111111111111111hifc8npp'
+    )
+    .select(
+      'accounts.alias',
+      'accounts_meta_index.weight',
+      'representatives_meta_index.account',
+      'reddit',
+      'discord',
+      'twitter',
+      'github',
+      'website',
+      'email'
+    )
+
+  const results_index = {}
+
+  // Index database representatives by account
+  const db_reps_index = db_reps.reduce((acc, cur) => {
+    acc[cur.account] = cur
+    return acc
+  }, {})
+
+  // Index CSV representatives by account
+  const csv_reps_index = csv_reps.reduce((acc, cur) => {
+    acc[cur.account] = cur
+    return acc
+  }, {})
+
+  // Merge csv and database data, skip accounts with conflicts
+  for (const account in csv_reps_index) {
+    const nano_rep = csv_reps_index[account]
+    const db_rep = db_reps_index[account]
+    const db_rep_without_weight_field = { ...db_rep }
+    delete db_rep_without_weight_field.weight
+    const differences = diff(nano_rep, db_rep_without_weight_field)
+
+    // Filter for conflicting edits
+    const edits = differences.filter((diff) => diff.kind === 'E')
+    const conflicting_edits = edits.filter(
+      (edit) => Boolean(edit.lhs) && Boolean(edit.rhs)
+    )
+
+    if (conflicting_edits.length) {
+      log(`conflicting edits for account: ${account}`)
+      log(conflicting_edits)
+      continue
+    }
+
+    // Merge values, preferring truthy values
+    const merged_rep = {}
+    for (const key in merged_rep) {
+      if (merged_rep[key] === null) {
+        merged_rep[key] = nano_rep[key] || db_rep_without_weight_field[key]
+      }
+    }
+
+    results_index[account] = {
+      alias: nano_rep.alias || db_rep_without_weight_field.alias,
+      account: nano_rep.account || db_rep_without_weight_field.account,
+      discord: nano_rep.discord || db_rep_without_weight_field.discord || '',
+      reddit: nano_rep.reddit || db_rep_without_weight_field.reddit || '',
+      twitter: nano_rep.twitter || db_rep_without_weight_field.twitter || '',
+      website: nano_rep.website || db_rep_without_weight_field.website || '',
+      email: nano_rep.email || db_rep_without_weight_field.email || '',
+      github: nano_rep.github || db_rep_without_weight_field.github || ''
+    }
+  }
+
+  // Add missing representatives with sufficient voting weight (10,000 Nano) and at least one non-null field
+  for (const account in db_reps_index) {
+    if (
+      !results_index[account] &&
+      db_reps_index[account].weight >
+        REPRESENTATIVE_TRACKING_MINIMUM_VOTING_WEIGHT
+    ) {
+      const {
+        weight,
+        account: db_rep_account,
+        ...db_rep_without_weight_field
+      } = db_reps_index[account]
+      const has_non_null_field = Object.values(
+        db_rep_without_weight_field
+      ).some((value) => value !== null)
+      if (has_non_null_field) {
+        results_index[account] = {
+          alias: db_rep_without_weight_field.alias,
+          account: db_rep_account,
+          discord: db_rep_without_weight_field.discord || '',
+          reddit: db_rep_without_weight_field.reddit || '',
+          twitter: db_rep_without_weight_field.twitter || '',
+          website: db_rep_without_weight_field.website || '',
+          email: db_rep_without_weight_field.email || '',
+          github: db_rep_without_weight_field.github || ''
+        }
+      }
+    }
+  }
+
+  // Sort results by alias
+  const results = Object.values(results_index).sort((a, b) => {
+    const alias_a = a.alias || ''
+    const alias_b = b.alias || ''
+    return alias_a.localeCompare(alias_b)
+  })
+
+  // escape any commas in alias
+  for (const rep of results) {
+    // check if alias contains a comma
+    if (rep.alias.includes(',')) {
+      rep.alias = `"${rep.alias}"`
+    }
+  }
+
+  // Convert results to CSV and save
+  const csv_headers = {}
+  for (const field of Object.keys(results[0])) {
+    csv_headers[field] = field
+  }
+  const result_csv_data = [csv_headers, ...results]
+  const result_csv_string = JSON.stringify(result_csv_data)
+  const result_csv = convertToCSV(result_csv_string)
+  const __dirname = dirname(fileURLToPath(import.meta.url))
+  const data_path = path.join(__dirname, '../data')
+  const csv_path = path.join(data_path, 'nano-reps.csv')
+  await fs.promises.writeFile(csv_path, result_csv)
+
+  log(`wrote ${results.length} reps to ${csv_path}`)
+}
+
+const main = async () => {
+  let error
+  try {
+    await generate_nano_reps()
+  } catch (err) {
+    error = err
+    log(error)
+  }
+
+  // await db('jobs').insert({
+  //   type: constants.jobs.EXAMPLE,
+  //   succ: error ? 0 : 1,
+  //   reason: error ? error.message : null,
+  //   timestamp: Math.round(Date.now() / 1000)
+  // })
+
+  process.exit()
+}
+
+if (isMain(import.meta.url)) {
+  main()
+}
+
+export default generate_nano_reps
diff --git a/yarn.lock b/yarn.lock
@@ -6997,6 +6997,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"csv-parser@npm:^3.0.0":
+  version: 3.0.0
+  resolution: "csv-parser@npm:3.0.0"
+  dependencies:
+    minimist: ^1.2.0
+  bin:
+    csv-parser: bin/csv-parser
+  checksum: adc9d67d9f185249825570778c24d13004625301655330f6b735a052b9fdfbe1a239a014afb1f89939e0626ee573718f71f9f14164db7c17e4bcb2f38d6a162b
+  languageName: node
+  linkType: hard
+
 "csv-writer@npm:^1.6.0":
   version: 1.6.0
   resolution: "csv-writer@npm:1.6.0"
@@ -7243,6 +7254,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"deep-diff@npm:^1.0.2":
+  version: 1.0.2
+  resolution: "deep-diff@npm:1.0.2"
+  checksum: 9de8b5eedc1957116e1b47e4c3c4e3dbe23cb741abefc5ec8829a12e77958c689ac46888a3c35320f976cf42fb6de2b016e158facdb24d894ab5b5fdabad9b34
+  languageName: node
+  linkType: hard
+
 "deep-extend@npm:^0.6.0":
   version: 0.6.0
   resolution: "deep-extend@npm:0.6.0"
@@ -15687,9 +15705,11 @@ __metadata:
     cors: ^2.8.5
     cross-env: ^7.0.3
     css-loader: 6.8.1
+    csv-parser: ^3.0.0
     csv-writer: ^1.6.0
     dayjs: ^1.11.9
     debug: ^4.3.4
+    deep-diff: ^1.0.2
     deep-extend: ^0.6.0
     deepmerge: 4.3.1
     echarts: ^5.4.3