From 7b29c6860226a5266474b7ed36f4cc05a7ad3f86 Mon Sep 17 00:00:00 2001 From: Jim Benton <3331+jim@users.noreply.github.com> Date: Thu, 10 Oct 2024 09:38:13 -0500 Subject: [PATCH] Add some utilities for pulling repo data (#1691) The README says it best: > This directory holds some scripts used to fetch repository data from Github and extract it in various forms. It's useful for generating a list of PRs recently merged, or a list of who has contributed, etc. > > Fetching the data requires Python and some Python libraries. --------- Co-authored-by: Michael Crismali --- repo_analysis/.gitignore | 1 + repo_analysis/Gemfile | 6 ++++++ repo_analysis/Gemfile.lock | 16 ++++++++++++++++ repo_analysis/README.md | 15 +++++++++++++++ repo_analysis/Rakefile | 29 +++++++++++++++++++++++++++++ repo_analysis/requirements.in | 2 ++ repo_analysis/sync.sh | 5 +++++ 7 files changed, 74 insertions(+) create mode 100644 repo_analysis/.gitignore create mode 100644 repo_analysis/Gemfile create mode 100644 repo_analysis/Gemfile.lock create mode 100644 repo_analysis/README.md create mode 100644 repo_analysis/Rakefile create mode 100644 repo_analysis/requirements.in create mode 100755 repo_analysis/sync.sh diff --git a/repo_analysis/.gitignore b/repo_analysis/.gitignore new file mode 100644 index 000000000..2e524bdc1 --- /dev/null +++ b/repo_analysis/.gitignore @@ -0,0 +1 @@ +github.db diff --git a/repo_analysis/Gemfile b/repo_analysis/Gemfile new file mode 100644 index 000000000..ab88720e2 --- /dev/null +++ b/repo_analysis/Gemfile @@ -0,0 +1,6 @@ +source "https://rubygems.org" +git_source(:github) { |repo| "https://github.com/#{repo}.git" } + +ruby "3.1.6" + +gem "sqlite3" diff --git a/repo_analysis/Gemfile.lock b/repo_analysis/Gemfile.lock new file mode 100644 index 000000000..bd57b8ade --- /dev/null +++ b/repo_analysis/Gemfile.lock @@ -0,0 +1,16 @@ +GEM + remote: https://rubygems.org/ + specs: + sqlite3 (2.0.3-arm64-darwin) + +PLATFORMS + arm64-darwin-23 + +DEPENDENCIES + sqlite3 + +RUBY VERSION + ruby 3.1.6p260 + +BUNDLED WITH + 2.4.15 diff --git a/repo_analysis/README.md b/repo_analysis/README.md new file mode 100644 index 000000000..d755aaa90 --- /dev/null +++ b/repo_analysis/README.md @@ -0,0 +1,15 @@ +# Repo Analysis + +This directory holds some scripts used to fetch repository data from Github +and extract it in various forms. It's useful for generating a list of PRs +recently merges, or a list of who has contributed, etc. + +Fetching the data requires Python and some Python libraries. + +To use: + +1. Install the dependencies using `pip3 install -r requirements.in'. +2. Setup a github access token [and configure github-to-sqlite to use it](https://github.com/dogsheep/github-to-sqlite?tab=readme-ov-file#authentication). +3. Run `sync.sh` to fetch the data. +4. Run `datasette github.db` to investigate the data in a nice web-based UI (optional but sometimes handy). +5. Edit/run `rake` to extract the data in a specific format. diff --git a/repo_analysis/Rakefile b/repo_analysis/Rakefile new file mode 100644 index 000000000..e52030ddc --- /dev/null +++ b/repo_analysis/Rakefile @@ -0,0 +1,29 @@ +require "csv" +require "sqlite3" + +db = SQLite3::Database.new("github.db") + +desc "Print a CSV list of PRs (either still open or closed in the last two months)" +task :recent_prs do + query = <<~SQL + SELECT pull_requests.*, users.name + FROM pull_requests + LEFT JOIN users on users.id == pull_requests.user + WHERE (closed_at IS NULL OR closed_at >= "2024-06-01") AND user != 49699333 + ORDER BY closed_at DESC NULLS FIRST + SQL + + # pull_requests schema + # id, node_id, number, state, locked, title, user, body, created_at, updated_at, closed_at, merged_at, merge_commit_sha, assignee, milestone, draft, head, base, author_association, auto_merge, repo, url, merged_by + + csv = CSV.new($stdout, col_sep: "\t") + csv << %w[pr author merged_at link] + db.execute(query) do |row| + csv << [ + row[5], # title + row[23], # author + row[11], # state + "https://github.com/chicago-tool-library/circulate/pull/#{row[2]}" + ] + end +end diff --git a/repo_analysis/requirements.in b/repo_analysis/requirements.in new file mode 100644 index 000000000..2217dda3d --- /dev/null +++ b/repo_analysis/requirements.in @@ -0,0 +1,2 @@ +github-to-sqlite +datasette \ No newline at end of file diff --git a/repo_analysis/sync.sh b/repo_analysis/sync.sh new file mode 100755 index 000000000..06ecf3a88 --- /dev/null +++ b/repo_analysis/sync.sh @@ -0,0 +1,5 @@ +#!/bin/sh +set -e + +github-to-sqlite pull-requests github.db chicago-tool-library/circulate +github-to-sqlite contributors github.db chicago-tool-library/circulate \ No newline at end of file