Skip to content

Commit

Permalink
Merge pull request #124 from meaningfy-ws/feature/TDA-284
Browse files Browse the repository at this point in the history
Update reprocess_unnormalised_notices_from_backlog.py
  • Loading branch information
CaptainOfHacks authored Oct 25, 2023
2 parents 8421113 + f0d1751 commit 8e87190
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 15 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get install -y libssl-dev libcurl4-openssl-dev
python -m pip install --upgrade setuptools pip wheel
make install
make install-dev
python -m pip install --upgrade setuptools pip wheel tox~=4.11.3
- name: Make envfile
uses: SpicyPizza/create-envfile@v1
with:
Expand Down
3 changes: 2 additions & 1 deletion dags/reprocess_unnormalised_notices_from_backlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def reprocess_unnormalised_notices_from_backlog():
def select_all_raw_notices():
start_date = get_dag_param(key=START_DATE_DAG_PARAM)
end_date = get_dag_param(key=END_DATE_DAG_PARAM)
notice_ids = notice_ids_selector_by_status(notice_statuses=[NoticeStatus.RAW], start_date=start_date,
notice_ids = notice_ids_selector_by_status(notice_statuses=[NoticeStatus.RAW, NoticeStatus.INDEXED],
start_date=start_date,
end_date=end_date)
push_dag_downstream(key=NOTICE_IDS_KEY, value=notice_ids)

Expand Down
14 changes: 7 additions & 7 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
coverage~=6.3.1
pytest~=7.0.0
pytest-bdd~=5.0.0
pytest-cov~=3.0.0
pytest-subtests~=0.6.0
tox~=3.24.5
coverage~=7.3.2
pytest~=7.4.3
pytest-bdd~=7.0.0
pytest-cov~=4.1.0
pytest-subtests~=0.11.0
tox~=4.11.3
tox-pytest-summary~=0.1.2
mongomock==4.0.0
mongomock~=4.1.2
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
git+https://github.com/OP-TED/ted-rdf-conversion-pipeline.git@main
git+https://github.com/OP-TED/ted-rdf-conversion-pipeline.git@1.2.0-rc.2
elasticsearch~=8.6.2
currencyconverter~=0.17.6
pycountry~=22.3.5
Expand Down
1 change: 1 addition & 0 deletions ted_data_eu/adapters/nuts_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def __init__(self, nuts_csv: io.StringIO):
}
)
self.dataframe[self.NUTS_PARENT_COLUMN_NAME] = self.dataframe[self.NUTS_PARENT_COLUMN_NAME].str.split("/").str[-1]
self.dataframe[self.NUTS_LABEL_COLUMN_NAME] = self.dataframe[self.NUTS_LABEL_COLUMN_NAME].str.partition(" ")[2]

def nuts_exists(self, nuts_code: str) -> bool:
"""
Expand Down
4 changes: 3 additions & 1 deletion ted_data_eu/services/etl_pipelines/ted_data_etl_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
END_DATE_METADATA_FIELD = "end_date"
TRIPLE_STORE_ENDPOINT = "notices"
TED_NOTICES_LINK = 'https://ted.europa.eu/udl?uri=TED:NOTICE:{notice_id}:TEXT:EN:HTML'
TRIPLE_STORE_ENDPOINT_FIELD = "triple_store_endpoint"

PROCEDURE_TYPE_COLUMN_NAME = "procedure_type"
WINNER_NUTS_COLUMN_NAME = "winner_nuts"
Expand Down Expand Up @@ -204,6 +205,7 @@ def extract(self) -> Dict:
"""
etl_metadata = self.get_metadata()
etl_metadata_fields = etl_metadata.keys()
triple_store_endpoint = etl_metadata[TRIPLE_STORE_ENDPOINT_FIELD] if TRIPLE_STORE_ENDPOINT_FIELD in etl_metadata_fields else TRIPLE_STORE_ENDPOINT
if START_DATE_METADATA_FIELD in etl_metadata_fields and END_DATE_METADATA_FIELD in etl_metadata_fields:
if START_DATE_METADATA_FIELD == END_DATE_METADATA_FIELD:
date_range = datetime.strptime(START_DATE_METADATA_FIELD, "\"%Y%m%d\"")
Expand All @@ -218,7 +220,7 @@ def extract(self) -> Dict:

sparql_query_template = Template(config.BQ_PATHS[SPARQL_QUERY_NAME].read_text(encoding='utf-8'))
sparql_query_str = sparql_query_template.substitute(date_range=date_range)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=TRIPLE_STORE_ENDPOINT)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=triple_store_endpoint)
result_table = triple_store_endpoint.with_query(sparql_query_str).fetch_tabular()
return {"data": result_table}

Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/test_ted_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
CONTRACT_VALUE_AVAILABLE_INDICATOR, PROCEDURE_TYPE_INDICATOR, PRODUCT_CODES_AVAILABLE_INDICATOR, LOT_NUTS_0, \
LOT_NUTS_1, LOT_NUTS_2, LOT_NUTS_3, get_country_name_by_code, BUYER_NUTS_COLUMN_NAME, PROCEDURE_ID_COLUMN_NAME, \
PROCEDURE_DESCRIPTION_COLUMN_NAME, PROCEDURE_COLUMN_NAME, TDA_FREE_INDEX_NAME, TDA_STARTER_INDEX_NAME, CPV_RANK_4, \
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY, TRIPLE_STORE_ENDPOINT_FIELD


def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_alpha_2, real_country_code_alpha_3,
Expand All @@ -20,6 +20,7 @@ def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_a

def test_etl_pipeline(ted_data_etl_pipelines, etl_pipeline_config, graphdb_triple_store, example_notices,
tmp_repository_name):
etl_pipeline_config[TRIPLE_STORE_ENDPOINT_FIELD] = tmp_repository_name
for ted_data_etl_pipeline in ted_data_etl_pipelines:
graphdb_repositories = graphdb_triple_store.list_repositories()
if tmp_repository_name in graphdb_repositories:
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_nuts_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@ def test_cellar_nuts_processor(real_nuts, fake_nuts, cellar_nuts_processor):
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='FRK') == 1
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='FR') == 0
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='F') is None
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code=None) is None
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code=None) is None

assert cellar_nuts_processor.get_nuts_label_by_code(nuts_code='BE32') == 'Prov. Hainaut'

0 comments on commit 8e87190

Please sign in to comment.