Skip to content

Commit

Permalink
Merge pull request #392 from axif0/workflow_error
Browse files Browse the repository at this point in the history
added check for sparql and for json in workflow
  • Loading branch information
andrewtavis authored Oct 16, 2024
2 parents eef5b4c + 6f53411 commit 9767fee
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 17 deletions.
78 changes: 61 additions & 17 deletions src/scribe_data/check/check_project_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,65 @@
BASE_DIR = "../language_data_extraction"


def check_data_type_folders(path, language, subdir, errors):
"""
Validate the contents of data type folders within a language directory.
This function checks each data type folder for the presence of expected files
and reports any unexpected files. It allows for multiple SPARQL query files,
a format Python file, and a queried JSON file for each data type.
Args:
path (str): The path to the directory containing data type folders.
language (str): The name of the language being processed.
subdir (str or None): The name of the sub-directory (for languages with sub-dialects), or None.
errors (list): A list to which error messages will be appended.
The function checks for the following valid files in each data type folder:
- Files starting with 'query_' and ending with '.sparql'
- A 'format_{data_type}.py' file
- A '{data_type}_queried.json' file
It skips validation for the 'emoji_keywords' data type folder.
Any files not matching these patterns (except '__init__.py') are reported as unexpected.
"""
for item in os.listdir(path):
item_path = os.path.join(path, item)
if os.path.isfile(item_path) and item != "__init__.py":
errors.append(f"Unexpected file found in {language}/{subdir or ''}: {item}")
elif os.path.isdir(item_path):
if item not in DATA_TYPES:
errors.append(
f"Unexpected directory found in {language}/{subdir or ''}: {item}"
)
else:
# Skip validation for emoji_keywords
if item == "emoji_keywords":
continue

# Check for correctly formatted files
valid_files = [
f
for f in os.listdir(item_path)
if (f.startswith(f"query_{item}") and f.endswith(".sparql"))
or f == f"format_{item}.py"
or f == f"{item}_queried.json"
]

for file in os.listdir(item_path):
if file not in valid_files and file != "__init__.py":
error_subdir = f"{subdir}/" or ""
errors.append(
f"Unexpected file in {language}/{error_subdir}{item}: {file}"
)


def validate_project_structure():
"""
Validate that all directories follow the expected project structure and check for unexpected files and directories."""
Validate that all directories follow the expected project structure and check for unexpected files and directories.
Also validate SPARQL query file names in data_type folders and SUBDIRECTORIES.
"""
errors = []

if not os.path.exists(BASE_DIR):
Expand Down Expand Up @@ -129,22 +185,10 @@ def validate_project_structure():
for subdir in expected_subdirs:
subdir_path = os.path.join(language_path, subdir)
if os.path.exists(subdir_path):
for item in os.listdir(subdir_path):
item_path = os.path.join(subdir_path, item)
if os.path.isfile(item_path) and item != "__init__.py":
errors.append(
f"Unexpected file found in {language}/{subdir}: {item}"
)

elif os.path.isdir(item_path) and item not in DATA_TYPES:
errors.append(
f"Unexpected directory found in {language}/{subdir}: {item}"
)

elif unexpected_data_types := found_subdirs - DATA_TYPES:
errors.append(
f"Unexpected subdirectories in '{language}': {unexpected_data_types}"
)
check_data_type_folders(subdir_path, language, subdir, errors)

else:
check_data_type_folders(language_path, language, None, errors)

if errors:
print("Errors found:")
Expand Down

0 comments on commit 9767fee

Please sign in to comment.