From 5615d9616f167569ed661460cb1437e99d33fbf7 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 11:10:15 +0100 Subject: [PATCH 1/7] refacto process actuel de publication, sur la ressource : nb_comptes_crees --- .../cron/datagouv/account_by_month_job.rb | 19 +++++++++------- .../generate_open_data_csv_service.rb | 22 +++++-------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 77a6d004531..8c1035b1bf4 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -3,19 +3,22 @@ class Cron::Datagouv::AccountByMonthJob < Cron::CronJob include DatagouvCronSchedulableConcern self.schedule_expression = "every month at 4:30" - FILE_NAME = "nb_comptes_crees_par_mois" + HEADERS = ["mois", "nb_comptes_crees_par_mois"] + FILE_NAME = "#{HEADERS[1]}.csv" def perform(*args) - GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, data) do |file| - begin - APIDatagouv::API.upload(file, :statistics_dataset) - ensure - FileUtils.rm(file) - end + GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, data) do |file| + APIDatagouv::API.upload(file, :statistics_dataset) end end + private + def data - User.where(created_at: 1.month.ago.all_month).count + [[date_last_month, User.where(created_at: 1.month.ago.all_month).count]] + end + + def date_last_month + Date.today.prev_month.strftime("%Y %B") end end diff --git a/app/services/generate_open_data_csv_service.rb b/app/services/generate_open_data_csv_service.rb index 6f111c87f51..16561976d3e 100644 --- a/app/services/generate_open_data_csv_service.rb +++ b/app/services/generate_open_data_csv_service.rb @@ -1,21 +1,11 @@ # frozen_string_literal: true class GenerateOpenDataCsvService - def self.save_csv_to_tmp(file_name, data) - f = Tempfile.create(["#{file_name}_#{date_last_month}", '.csv'], 'tmp') - f << generate_csv(file_name, data) - f.rewind - yield f if block_given? - f.close - end - - def self.date_last_month - Date.today.prev_month.strftime("%B") - end - - def self.generate_csv(file_name, data) - headers = ["mois", file_name] - data = [[date_last_month, data]] - SpreadsheetArchitect.to_csv(headers: headers, data: data) + def self.save_csv_to_tmp(file_name, headers, data) + Tempfile.create(file_name, 'tmp') do |file| + file << SpreadsheetArchitect.to_csv(headers:, data:) + file.rewind + yield file if block_given? + end end end From 662015fecc39f51385895e4bd587d62dc5a7d496 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 12:21:59 +0100 Subject: [PATCH 2/7] =?UTF-8?q?v=C3=A9rification=20de=20l'existence=20du?= =?UTF-8?q?=20fichier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/jobs/cron/datagouv/account_by_month_job.rb | 2 ++ app/lib/api_datagouv/api.rb | 18 ++++++++++++++++++ config/env.example.optional | 1 + config/secrets.yml | 1 + 4 files changed, 22 insertions(+) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 8c1035b1bf4..0f5b503c5be 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -7,6 +7,8 @@ class Cron::Datagouv::AccountByMonthJob < Cron::CronJob FILE_NAME = "#{HEADERS[1]}.csv" def perform(*args) + APIDatagouv::API.check_existing_resource(:statistics_dataset, :statistics_account_by_month_resource) + GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, data) do |file| APIDatagouv::API.upload(file, :statistics_dataset) end diff --git a/app/lib/api_datagouv/api.rb b/app/lib/api_datagouv/api.rb index 0cf4e11a1be..070a4e017aa 100644 --- a/app/lib/api_datagouv/api.rb +++ b/app/lib/api_datagouv/api.rb @@ -13,6 +13,15 @@ def initialize(url, response) end class << self + def check_existing_resource(dataset, resource) + response = Typhoeus.get( + datagouv_check_url(dataset, resource), + headers: { "X-Api-Key" => datagouv_secret[:api_key] } + ) + + response.success? + end + def upload(io, dataset, resource = nil) response = Typhoeus.post( datagouv_upload_url(dataset, resource), @@ -32,6 +41,15 @@ def upload(io, dataset, resource = nil) private + def datagouv_check_url(dataset, resource) + [ + datagouv_secret[:api_url], + "/datasets/", datagouv_secret[dataset], + "/resources/", datagouv_secret[resource], + "/check/" + ].join + end + def datagouv_upload_url(dataset, resource = nil) if resource.present? [ diff --git a/config/env.example.optional b/config/env.example.optional index 050e5d49bec..21b46cc8607 100644 --- a/config/env.example.optional +++ b/config/env.example.optional @@ -173,6 +173,7 @@ OPENDATA_ENABLED="enabled" # disabled by default if `OPENDATA_ENABLED` not set DATAGOUV_API_KEY="thisisasecret" DATAGOUV_API_URL="https://www.data.gouv.fr/api/1" DATAGOUV_STATISTICS_DATASET="dataset-id1" +DATAGOUV_STATICTICS_ACOUNT_RESOURCE="resource-id1-of-dataset-id1" DATAGOUV_DESCRIPTIF_DEMARCHES_DATASET="dataset-id2" DATAGOUV_DESCRIPTIF_DEMARCHES_RESOURCE="resource-id-of-dataset-id2" diff --git a/config/secrets.yml b/config/secrets.yml index 61cd28651ca..7460d0f0bdc 100644 --- a/config/secrets.yml +++ b/config/secrets.yml @@ -81,6 +81,7 @@ defaults: &defaults descriptif_demarches_dataset: <%= ENV['DATAGOUV_DESCRIPTIF_DEMARCHES_DATASET'] %> descriptif_demarches_resource: <%= ENV['DATAGOUV_DESCRIPTIF_DEMARCHES_RESOURCE'] %> statistics_dataset: <%= ENV['DATAGOUV_STATISTICS_DATASET'] %> + statistics_account_by_month_resource: <%= ENV['DATAGOUV_STATICTICS_ACOUNT_RESOURCE'] %> development: From 7176752b346c9d45811f01bcbe3264d5cd2a2788 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:57:53 +0100 Subject: [PATCH 3/7] si pas de fichier existant, on publie un nouveau --- app/jobs/cron/datagouv/account_by_month_job.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 0f5b503c5be..41040950f8a 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -7,17 +7,21 @@ class Cron::Datagouv::AccountByMonthJob < Cron::CronJob FILE_NAME = "#{HEADERS[1]}.csv" def perform(*args) - APIDatagouv::API.check_existing_resource(:statistics_dataset, :statistics_account_by_month_resource) + if APIDatagouv::API.check_existing_resource(:statistics_dataset, :statistics_account_by_month_resource) - GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, data) do |file| - APIDatagouv::API.upload(file, :statistics_dataset) + else + # no existing file, so we create a new file with only the data of the previous month + data = data(1) + GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, data) do |file| + APIDatagouv::API.upload(file, :statistics_dataset) + end end end private - def data - [[date_last_month, User.where(created_at: 1.month.ago.all_month).count]] + def data(n) + [[date_last_month, User.where(created_at: n.month.ago.all_month).count]] end def date_last_month From dde19afd859103567e7acf4f305519a68e9c6bf0 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:14:05 +0100 Subject: [PATCH 4/7] =?UTF-8?q?si=20fichier=20existant,=20r=C3=A9cup=C3=A9?= =?UTF-8?q?ration=20des=20donn=C3=A9es=20+=20d=C3=A9tection=20derni=C3=A8r?= =?UTF-8?q?e=20date?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cron/datagouv/account_by_month_job.rb | 24 +++++++++++++++++-- app/lib/api_datagouv/api.rb | 21 ++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 41040950f8a..61a5f50a74c 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -7,8 +7,10 @@ class Cron::Datagouv::AccountByMonthJob < Cron::CronJob FILE_NAME = "#{HEADERS[1]}.csv" def perform(*args) - if APIDatagouv::API.check_existing_resource(:statistics_dataset, :statistics_account_by_month_resource) - + if existing_resource?(:statistics_dataset, :statistics_account_by_month_resource) + # there is an existing file, so we update it + existing_data = existing_data(:statistics_dataset, :statistics_account_by_month_resource) + nb_months_to_query = nb_months_to_query(existing_data) else # no existing file, so we create a new file with only the data of the previous month data = data(1) @@ -27,4 +29,22 @@ def data(n) def date_last_month Date.today.prev_month.strftime("%Y %B") end + + def existing_resource?(dataset, resource) + APIDatagouv::API.check_existing_resource(dataset, resource) + end + + def existing_data(dataset, resource) + file_url = APIDatagouv::API.get_existing_file_url(dataset, resource) + csv_data = URI.open(file_url).read + data = [] + CSV.parse(csv_data, headers: true) do |row| + data << [row[0], row[1].to_i] + end + end + + def nb_months_to_query(existing_data) + last_date = Date.parse("#{existing_data[0][0]}-01") + (Date.today.year * 12 + Date.today.month) - (last_date.year * 12 + last_date.month) - 1 + end end diff --git a/app/lib/api_datagouv/api.rb b/app/lib/api_datagouv/api.rb index 070a4e017aa..b8c29a680bf 100644 --- a/app/lib/api_datagouv/api.rb +++ b/app/lib/api_datagouv/api.rb @@ -22,6 +22,19 @@ def check_existing_resource(dataset, resource) response.success? end + def get_existing_file_url(dataset, resource) + response = Typhoeus.get( + datagouv_get_resource_url(dataset, resource), + followlocation: true + ) + + if response.success? + JSON.parse(response.body)["url"] + else + raise RequestFailed.new(datagouv_get_resource_url(dataset, resource), response) + end + end + def upload(io, dataset, resource = nil) response = Typhoeus.post( datagouv_upload_url(dataset, resource), @@ -50,6 +63,14 @@ def datagouv_check_url(dataset, resource) ].join end + def datagouv_get_resource_url(dataset, resource) + [ + datagouv_secret[:api_url], + "/datasets/", datagouv_secret[dataset], + "/resources/", datagouv_secret[resource] + ].join + end + def datagouv_upload_url(dataset, resource = nil) if resource.present? [ From 1b3933df5d5bbfbc866b360fab1355e2cf4356c8 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:46:00 +0100 Subject: [PATCH 5/7] =?UTF-8?q?fichier=20existant,=20on=20ajoute=20les=20n?= =?UTF-8?q?ouvelles=20donn=C3=A9es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/jobs/cron/datagouv/account_by_month_job.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 61a5f50a74c..1f3c6240071 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -11,9 +11,14 @@ def perform(*args) # there is an existing file, so we update it existing_data = existing_data(:statistics_dataset, :statistics_account_by_month_resource) nb_months_to_query = nb_months_to_query(existing_data) + if nb_months_to_query > 0 + nb_months_to_query.downto(1) do + existing_data << data(n) + end + end else # no existing file, so we create a new file with only the data of the previous month - data = data(1) + data = [data(1)] GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, data) do |file| APIDatagouv::API.upload(file, :statistics_dataset) end @@ -23,11 +28,11 @@ def perform(*args) private def data(n) - [[date_last_month, User.where(created_at: n.month.ago.all_month).count]] + [date_last_month(n), User.where(created_at: n.month.ago.all_month).count] end - def date_last_month - Date.today.prev_month.strftime("%Y %B") + def date_last_month(n) + Date.today.prev_month(n).strftime("%Y %B") end def existing_resource?(dataset, resource) From 6107dd77f8ee302ecbf5c9604f2eef28d1c94859 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:03:33 +0100 Subject: [PATCH 6/7] on construit le nouveau fichier et on upload sur le fichier existant --- app/jobs/cron/datagouv/account_by_month_job.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/jobs/cron/datagouv/account_by_month_job.rb b/app/jobs/cron/datagouv/account_by_month_job.rb index 1f3c6240071..3495da227a5 100644 --- a/app/jobs/cron/datagouv/account_by_month_job.rb +++ b/app/jobs/cron/datagouv/account_by_month_job.rb @@ -16,6 +16,9 @@ def perform(*args) existing_data << data(n) end end + GenerateOpenDataCsvService.save_csv_to_tmp(FILE_NAME, HEADERS, existing_data) do |file| + APIDatagouv::API.upload(file, :statistics_dataset, :statistics_account_by_month_resource) + end else # no existing file, so we create a new file with only the data of the previous month data = [data(1)] From 146d00c32c71bc6d6d37f6cd86a38cee13a17844 Mon Sep 17 00:00:00 2001 From: benoitqueyron <72251526+Benoit-MINT@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:48:55 +0100 Subject: [PATCH 7/7] test : account_by_month_job_spec --- .../cron/datagouv/account_by_month_job_spec.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/spec/jobs/cron/datagouv/account_by_month_job_spec.rb b/spec/jobs/cron/datagouv/account_by_month_job_spec.rb index 1b8f04e0802..7ef1a718bac 100644 --- a/spec/jobs/cron/datagouv/account_by_month_job_spec.rb +++ b/spec/jobs/cron/datagouv/account_by_month_job_spec.rb @@ -4,18 +4,22 @@ let!(:user) { create(:user, created_at: 1.month.ago) } let(:status) { 200 } let(:body) { "ok" } - let(:stub) { stub_request(:post, /https:\/\/www.data.gouv.fr\/api\/.*\/upload\//) } + # let(:stub) { stub_request(:post, /https:\/\/www.data.gouv.fr\/api\/.*\/upload\//) } describe 'perform' do - before do - stub - end + # before do + # stub + # end subject { Cron::Datagouv::AccountByMonthJob.perform_now } it 'send POST request to datagouv' do subject - expect(stub).to have_been_requested + allow(APIDatagouv::API).to receive(:upload) do |file| + csv = CSV.read(file, headers: true) + expect(csv[0]['mois']).to eq(Date.today.prev_month.strftime("%B %Y")) + end + # expect(stub).to have_been_requested end end end