From 594a5ac0c64e594b874da1beee8dd2482480f52e Mon Sep 17 00:00:00 2001
From: Akindele Michael <akindelemichael65@gmail.com>
Date: Tue, 15 Oct 2024 14:22:45 +0100
Subject: [PATCH 01/36] check for invalid language and data type QIDs

---
 .../check/check_query_identifiers.py          | 99 +++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 src/scribe_data/check/check_query_identifiers.py

diff --git a/src/scribe_data/check/check_query_identifiers.py b/src/scribe_data/check/check_query_identifiers.py
new file mode 100644
index 000000000..b379a5c86
--- /dev/null
+++ b/src/scribe_data/check/check_query_identifiers.py
@@ -0,0 +1,99 @@
+import re
+from pathlib import Path
+
+from scribe_data.cli.cli_utils import (
+    LANGUAGE_DATA_EXTRACTION_DIR,
+    language_metadata,
+    data_type_metadata,
+)
+
+
+def extract_qid_from_sparql(file_path: Path, pattern: str) -> str:
+    """
+    Extract the QID based on the pattern provided (either language or data type).
+    """
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            content = file.read()
+            match = re.search(pattern, content)
+            if match:
+                return match.group(0).replace("wd:", "")
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+    return None
+
+
+def check_queries():
+    language_pattern = r"\?lexeme dct:language wd:Q\d+"
+    data_type_pattern = r"wikibase:lexicalCategory wd:Q\d+"
+    incorrect_languages = []
+    incorrect_data_types = []
+
+    language_extraction_dir = LANGUAGE_DATA_EXTRACTION_DIR
+    for query_file in language_extraction_dir.glob("**/*.sparql"):
+        lang_qid = extract_qid_from_sparql(query_file, language_pattern)
+        data_type_qid = extract_qid_from_sparql(query_file, data_type_pattern)
+
+        # Validate language QID and data type QID
+        if not is_valid_language(query_file, lang_qid):
+            incorrect_languages.append(query_file)
+        if not is_valid_data_type(query_file, data_type_qid):
+            incorrect_data_types.append(query_file)
+
+    if incorrect_languages:
+        print("Queries with incorrect languages QIDs are:")
+        for file in incorrect_languages:
+            print(f"- {file}")
+
+    if incorrect_data_types:
+        print("Queries with incorrect data type QIDs are:")
+        for file in incorrect_data_types:
+            print(f"- {file}")
+
+
+def is_valid_language(query_file, lang_qid):
+    lang_directory_name = query_file.parent.parent.name.lower()
+    languages = language_metadata.get(
+        "languages"
+    )  # might not work since language_metadata file is not fully updated
+    language_entry = next(
+        (lang for lang in languages if lang["language"] == lang_directory_name), None
+    )
+
+    if not language_entry:
+        print(
+            f"Warning: Language '{lang_directory_name}' not found in language_metadata.json."
+        )
+        return False
+
+    expected_language_qid = language_entry["qid"]
+    print("Expected language QID:", expected_language_qid)
+
+    if lang_qid != expected_language_qid:
+        print(
+            f"Incorrect language QID in {lang_directory_name}. "
+            f"Found: {lang_qid}, Expected: {expected_language_qid}"
+        )
+        return False
+    return True
+
+
+def is_valid_data_type(query_file, data_type_qid):
+    directory_name = query_file.parent.name  # e.g., "nouns" or "verbs"
+    expected_data_type_qid = data_type_metadata.get(directory_name)
+
+    if data_type_qid != expected_data_type_qid:
+        print(
+            f"Warning: Incorrect data type QID in {query_file}. Found: {data_type_qid}, Expected: {expected_data_type_qid}"
+        )
+        return False
+    return True
+
+
+# Examples:
+
+# file_path = Path("French/verbs/query_verbs.sparql")
+# print(is_valid_data_type(file_path, "QW24907")) # check for data type
+# print(is_valid_language(file_path, "Q150")) # check for if valid language
+
+check_queries()

From defab4d33374bf47ee3b63ce335f14d29c06f5bc Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 12:54:33 +0100
Subject: [PATCH 02/36] Create query_adverbs.sparql

adverbs for yoruba
---
 .../Yoruba/adverbs/query_adverbs.sparql             | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..38387bde2
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Yoruba (Q34311) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT DISTINCT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q34311 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From 662a0f6f4be9a33d433a964d375cd4b11b7f70cc Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:16:01 +0200
Subject: [PATCH 03/36] Remove select distinct from all queries

---
 .../Arabic/adjectives/query_adjectives.sparql                   | 2 +-
 .../language_data_extraction/Arabic/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/Basque/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/Basque/verbs/query_verbs.sparql    | 2 +-
 .../language_data_extraction/Bengali/nouns/query_nouns.sparql   | 2 +-
 .../Chinese/Mandarin/nouns/query_nouns.sparql                   | 2 +-
 .../language_data_extraction/Czech/nouns/query_nouns.sparql     | 2 +-
 .../Danish/adjectives/query_adjectives_1.sparql                 | 2 +-
 .../Danish/adjectives/query_adjectives_2.sparql                 | 2 +-
 .../Danish/adjectives/query_adjectives_3.sparql                 | 2 +-
 .../Danish/adverbs/query_adverbs.sparql                         | 2 +-
 .../language_data_extraction/Danish/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/English/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/English/verbs/query_verbs.sparql   | 2 +-
 .../Esperanto/adjectives/query_adjectives.sparql                | 2 +-
 .../Esperanto/adverbs/query_adverbs.sparql                      | 2 +-
 .../language_data_extraction/Esperanto/nouns/query_nouns.sparql | 2 +-
 .../Esperanto/personal_pronouns/query_personal_pronouns.sparql  | 2 +-
 .../language_data_extraction/Esperanto/verbs/query_verbs.sparql | 2 +-
 .../Estonian/adverbs/query_adverbs_1.sparql                     | 2 +-
 .../Estonian/adverbs/query_adverbs_2.sparql                     | 2 +-
 .../language_data_extraction/Estonian/nouns/query_nouns.sparql  | 2 +-
 .../language_data_extraction/Estonian/verbs/query_verbs.sparql  | 2 +-
 .../language_data_extraction/Finnish/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/French/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/German/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/German/verbs/query_verbs_1.sparql  | 2 +-
 .../language_data_extraction/German/verbs/query_verbs_2.sparql  | 2 +-
 .../language_data_extraction/Greek/nouns/query_nouns.sparql     | 2 +-
 .../language_data_extraction/Greek/verbs/query_verbs.sparql     | 2 +-
 .../language_data_extraction/Hausa/nouns/query_nouns.sparql     | 2 +-
 .../language_data_extraction/Hebrew/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/Hebrew/verbs/query_verbs_1.sparql  | 2 +-
 .../language_data_extraction/Hebrew/verbs/query_verbs_2.sparql  | 2 +-
 .../language_data_extraction/Hebrew/verbs/query_verbs_3.sparql  | 2 +-
 .../language_data_extraction/Hebrew/verbs/query_verbs_4.sparql  | 2 +-
 .../Hindustani/Hindi/adjectives/query_adjectives.sparql         | 2 +-
 .../Hindustani/Hindi/adverbs/query_adverbs.sparql               | 2 +-
 .../Hindustani/Hindi/nouns/query_nouns.sparql                   | 2 +-
 .../Hindustani/Hindi/postpositions/query_postpositions.sparql   | 2 +-
 .../Hindustani/Hindi/prepositions/query_prepositions.sparql     | 2 +-
 .../Hindustani/Hindi/verbs/query_verbs.sparql                   | 2 +-
 .../Hindustani/Urdu/adjectives/query_adjectives.sparql          | 2 +-
 .../Hindustani/Urdu/adverbs/query_adverbs.sparql                | 2 +-
 .../Hindustani/Urdu/nouns/query_nouns.sparql                    | 2 +-
 .../Hindustani/Urdu/postpositions/query_postpositions.sparql    | 2 +-
 .../Hindustani/Urdu/prepositions/query_prepositions.sparql      | 2 +-
 .../Hindustani/Urdu/verbs/query_verbs.sparql                    | 2 +-
 .../Indonesian/nouns/query_nouns.sparql                         | 2 +-
 .../language_data_extraction/Italian/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/Japanese/nouns/query_nouns.sparql  | 2 +-
 .../Korean/adverbs/query_adverbs.sparql                         | 2 +-
 .../Korean/postposition/query_postpositions.sparql              | 2 +-
 .../language_data_extraction/Kurmanji/nouns/query_nouns.sparql  | 2 +-
 .../language_data_extraction/Malay/nouns/query_nouns.sparql     | 2 +-
 .../Malayalam/adjectives/query_adjectives.sparql                | 2 +-
 .../Malayalam/adverbs/query_adverbs.sparql                      | 2 +-
 .../language_data_extraction/Malayalam/nouns/query_nouns.sparql | 2 +-
 .../Malayalam/prepositions/query_prepositions.sparql            | 2 +-
 .../language_data_extraction/Malayalam/verbs/query_verbs.sparql | 2 +-
 .../Norwegian/Bokm\303\245l/nouns/query_nouns.sparql"           | 2 +-
 .../Norwegian/Bokm\303\245l/verbs/query_verbs.sparql"           | 2 +-
 .../Norwegian/Nynorsk/nouns/query_nouns.sparql                  | 2 +-
 .../Norwegian/Nynorsk/verbs/query_verbs.sparql                  | 2 +-
 .../Pidgin/Nigerian/adverbs/query_adverbs.sparql                | 2 +-
 .../Pidgin/Nigerian/nouns/query_nouns.sparql                    | 2 +-
 .../language_data_extraction/Polish/nouns/query_nouns.sparql    | 2 +-
 .../Portuguese/nouns/query_nouns.sparql                         | 2 +-
 .../Portuguese/verbs/query_verbs.sparql                         | 2 +-
 .../Punjabi/Gurmukhi/nouns/query_nouns.sparql                   | 2 +-
 .../Punjabi/Gurmukhi/verbs/query_verbs.sparql                   | 2 +-
 .../Punjabi/Shahmukhi/nouns/query_nouns.sparql                  | 2 +-
 .../Punjabi/Shahmukhi/verbs/query_verbs.sparql                  | 2 +-
 .../language_data_extraction/Russian/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/Russian/verbs/query_verbs.sparql   | 2 +-
 .../Slovak/adverbs/query_adverbs.sparql                         | 2 +-
 .../language_data_extraction/Slovak/nouns/query_nouns.sparql    | 2 +-
 .../language_data_extraction/Spanish/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/Swahili/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/Swedish/nouns/query_nouns.sparql   | 2 +-
 .../language_data_extraction/Tajik/nouns/query_nouns.sparql     | 2 +-
 .../language_data_extraction/Tamil/nouns/query_nouns.sparql     | 2 +-
 .../language_data_extraction/Ukrainian/nouns/query_nouns.sparql | 2 +-
 .../Yoruba/adverbs/query_adverbs.sparql                         | 2 +-
 .../language_data_extraction/Yoruba/nouns/query_nouns.sparql    | 2 +-
 85 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/src/scribe_data/language_data_extraction/Arabic/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Arabic/adjectives/query_adjectives.sparql
index ae7b2b1a4..0f9851c8d 100644
--- a/src/scribe_data/language_data_extraction/Arabic/adjectives/query_adjectives.sparql
+++ b/src/scribe_data/language_data_extraction/Arabic/adjectives/query_adjectives.sparql
@@ -2,7 +2,7 @@
 # All Arabic (Q13955) adjectives.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?femSingularNominativeIndef
diff --git a/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql
index e18ebcd83..56e8b42c1 100644
--- a/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Arabic (Q13955) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql
index 198959aec..19314c81a 100644
--- a/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Basque (Q8752) nouns and all implemented singular and plural forms.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?absIndefinite
   ?absSingular
diff --git a/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql
index 5c699ce2c..4bb8792b2 100644
--- a/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Basque (Q8752) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?future
diff --git a/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql
index c8a1a1dd5..dc36759e7 100644
--- a/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Bengali (Bangla Q9610) nouns and their forms in the various cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nominative
   ?genitive
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/nouns/query_nouns.sparql
index 1d672c3c2..f3badc28e 100644
--- a/src/scribe_data/language_data_extraction/Chinese/Mandarin/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Standard Mandarin Chinese (Q727694) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql
index d8456e98b..77cbb579d 100644
--- a/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Czeck (Q9056) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_1.sparql b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_1.sparql
index f08070804..e310ea383 100644
--- a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_1.sparql
+++ b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_1.sparql
@@ -2,7 +2,7 @@
 # All Danish (Q9035) adjectives and some of the available forms.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?commonSingularIndefinite
diff --git a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_2.sparql b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_2.sparql
index b4eb71462..508b65120 100644
--- a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_2.sparql
+++ b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_2.sparql
@@ -2,7 +2,7 @@
 # All Danish (Q9035) adjectives and some of the available forms.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?pluralPositive
diff --git a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_3.sparql b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_3.sparql
index 6d283ead8..eddb0dacb 100644
--- a/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_3.sparql
+++ b/src/scribe_data/language_data_extraction/Danish/adjectives/query_adjectives_3.sparql
@@ -2,7 +2,7 @@
 # All Danish (Q9035) adjectives and some of the available forms.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?indefiniteSuperlative
diff --git a/src/scribe_data/language_data_extraction/Danish/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Danish/adverbs/query_adverbs.sparql
index 51bb8caae..177735c7a 100644
--- a/src/scribe_data/language_data_extraction/Danish/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Danish/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Danish (Q9035) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql
index f93c9715f..dae3b9b66 100644
--- a/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Danish (Q9035) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql
index a740d1d8f..e60883fbe 100644
--- a/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All English (Q1860) nouns and their plural.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql
index 15228d792..ee09c6f00 100644
--- a/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All English (Q1860) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presSimp
diff --git a/src/scribe_data/language_data_extraction/Esperanto/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Esperanto/adjectives/query_adjectives.sparql
index ec51925c5..e85a304cb 100644
--- a/src/scribe_data/language_data_extraction/Esperanto/adjectives/query_adjectives.sparql
+++ b/src/scribe_data/language_data_extraction/Esperanto/adjectives/query_adjectives.sparql
@@ -2,7 +2,7 @@
 # All Esperanto (Q143) adjectives.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
 
diff --git a/src/scribe_data/language_data_extraction/Esperanto/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Esperanto/adverbs/query_adverbs.sparql
index 27892bf6b..41367afa6 100644
--- a/src/scribe_data/language_data_extraction/Esperanto/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Esperanto/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Esperanto (Q143) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql
index 4074b4280..9271cdfbe 100644
--- a/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Esperanto (Q143) nouns and their plurals for the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?accSingular
diff --git a/src/scribe_data/language_data_extraction/Esperanto/personal_pronouns/query_personal_pronouns.sparql b/src/scribe_data/language_data_extraction/Esperanto/personal_pronouns/query_personal_pronouns.sparql
index 007f374ad..e4d9281f7 100644
--- a/src/scribe_data/language_data_extraction/Esperanto/personal_pronouns/query_personal_pronouns.sparql
+++ b/src/scribe_data/language_data_extraction/Esperanto/personal_pronouns/query_personal_pronouns.sparql
@@ -2,7 +2,7 @@
 # All Esperanto (Q143) personal pronouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?personalPronouns
 
diff --git a/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql
index e08cf8748..074006a84 100644
--- a/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Esperanto (Q143) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presIndicative
diff --git a/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_1.sparql b/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_1.sparql
index 905f25945..a251d58c1 100644
--- a/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_1.sparql
+++ b/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_1.sparql
@@ -2,7 +2,7 @@
 # All Estonian (Q380057) adverbs and the corresponding forms per case.
 # Enter this query at https://query.wikidata.org/
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
   ?nominativeSingular
diff --git a/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_2.sparql b/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_2.sparql
index 4aa682c1e..48119a3b5 100644
--- a/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_2.sparql
+++ b/src/scribe_data/language_data_extraction/Estonian/adverbs/query_adverbs_2.sparql
@@ -2,7 +2,7 @@
 # All Estonian (Q380057) adverbs and the corresponding forms per case.
 # Enter this query at https://query.wikidata.org/
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
   ?adessiveSingular
diff --git a/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql
index e46a8e378..011f0b946 100644
--- a/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Estonian (Q9072) nouns and their plural.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql
index 7db9134ab..933685fc2 100644
--- a/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Estonian (Q9072) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?verb
 
diff --git a/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql
index 4e781d3b4..f11c4a097 100644
--- a/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Finnish (Q1412) nouns and their plural for the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql
index 12a992c69..32653659a 100644
--- a/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All French (Q150) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql
index 9c835843d..bda5d2b30 100644
--- a/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All German (Q188) nouns, their plurals and their genders in the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql
index c8b64df6b..e255fb7bc 100644
--- a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql
+++ b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql
@@ -2,7 +2,7 @@
 # All German (Q188) verbs and a portion of the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-# Not SELECT DISTINCT as we want to get verbs with both sein and haben as auxiliaries
+# Not SELECT as we want to get verbs with both sein and haben as auxiliaries
 SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
diff --git a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql
index 98d6b718b..e209dcc48 100644
--- a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql
+++ b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql
@@ -2,7 +2,7 @@
 # All German (Q188) verbs and a portion of the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-# Not SELECT DISTINCT as we want to get verbs with both sein and haben as auxiliaries
+# Not SELECT as we want to get verbs with both sein and haben as auxiliaries
 SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive ?pastParticiple ?auxiliaryVerb
diff --git a/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql
index f1e2d8a01..ca48a52ff 100644
--- a/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Greek (Q36510) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql
index 51811421a..ea669818d 100644
--- a/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Greek (Q36510) verb snd the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presFPS ?presSPS ?presTPS
diff --git a/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql
index 6734402bd..84800a22e 100644
--- a/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Hausa (Q56475) nouns and their gender.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql
index 41773856c..093cea32a 100644
--- a/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Hebrew (Q9288) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_1.sparql
index d922b978a..14c361444 100644
--- a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_1.sparql
+++ b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_1.sparql
@@ -2,7 +2,7 @@
 # All Hebrew (Q9288) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presSF ?presSM ?presPF ?presPM
diff --git a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_2.sparql
index bb4793004..fefb8547d 100644
--- a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_2.sparql
+++ b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_2.sparql
@@ -2,7 +2,7 @@
 # All Hebrew (Q9288) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?impSPSF ?impSPSM ?impSPPF ?impSPPM
diff --git a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_3.sparql b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_3.sparql
index b39eea963..e38e2025d 100644
--- a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_3.sparql
+++ b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_3.sparql
@@ -2,7 +2,7 @@
 # All Hebrew (Q9288) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?pastFPS ?pastSPSF ?pastSPSM ?pastTPSF ?pastTPSM
   ?pastFPP ?pastSPPF ?pastSPPM ?pastTPPF ?pastTPPM
diff --git a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_4.sparql b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_4.sparql
index c17d4198a..02c3d9ad4 100644
--- a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_4.sparql
+++ b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs_4.sparql
@@ -2,7 +2,7 @@
 # All Hebrew (Q9288) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?futFPS ?futSPSF ?futSPSM ?futTPSF ?futTPSM
   ?futFPP ?futSPPF ?futSPPM ?futTPPF ?futTPPM
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql
index 166de38d2..b1bba3c61 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?singulativeNumeral
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql
index 68fc55632..7e8ec4c66 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql
index de8590d05..5d315392b 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql
index dde9fb0ac..e026332f1 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   ?lexeme
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?postposition
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/prepositions/query_prepositions.sparql
index 3951f263c..d0e47bb32 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/prepositions/query_prepositions.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/prepositions/query_prepositions.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   ?lexeme
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?preposition
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql
index 984121e97..1a9b4f58c 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?directCase
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql
index 01aa22aa2..a4f18e40f 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
   ?singulativeNumeral
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql
index 09a8d7ca7..53c4bdfc9 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql
index e3b70b995..66efb97c6 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql
index d64490145..10c9a36f7 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.
 
-SELECT DISTINCT
+SELECT
   ?lexeme
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?postposition
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/prepositions/query_prepositions.sparql
index 6ed3f531c..6feddaa71 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/prepositions/query_prepositions.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/prepositions/query_prepositions.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.
 
-SELECT DISTINCT
+SELECT
   ?lexeme
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?preposition
diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql
index bf1d8b1fd..e6df3771c 100644
--- a/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "ur" to remove Hindustani (hi) words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?directCase
diff --git a/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql
index 084a67768..b26a0059d 100644
--- a/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Indonesian (Q9240) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql
index 2f85a9453..fbbd08aaa 100644
--- a/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Italian (Q652) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql
index 0dde5908a..098661648 100644
--- a/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Japanese (Q5287) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Korean/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Korean/adverbs/query_adverbs.sparql
index 020073b13..a76b657f0 100644
--- a/src/scribe_data/language_data_extraction/Korean/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Korean/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Korean (Q9176) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Korean/postposition/query_postpositions.sparql b/src/scribe_data/language_data_extraction/Korean/postposition/query_postpositions.sparql
index a1a8cb473..5a6cb2d44 100644
--- a/src/scribe_data/language_data_extraction/Korean/postposition/query_postpositions.sparql
+++ b/src/scribe_data/language_data_extraction/Korean/postposition/query_postpositions.sparql
@@ -2,7 +2,7 @@
 # All Korean (Q9176) postpositions.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?postposition
 
diff --git a/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql
index 822d09e61..a6839c2e0 100644
--- a/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Kurmanji (Q36163) nouns and their gender.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?directDefSingular
   ?gender
diff --git a/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql
index 4002b553d..1da57f106 100644
--- a/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Malay (Q9237) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Malayalam/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Malayalam/adjectives/query_adjectives.sparql
index 6e666edae..2bff79f3c 100644
--- a/src/scribe_data/language_data_extraction/Malayalam/adjectives/query_adjectives.sparql
+++ b/src/scribe_data/language_data_extraction/Malayalam/adjectives/query_adjectives.sparql
@@ -2,7 +2,7 @@
 # All Malayalam (Q36236) adjectives.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adjective
 
diff --git a/src/scribe_data/language_data_extraction/Malayalam/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Malayalam/adverbs/query_adverbs.sparql
index a0b17ddd7..e1a0af8d5 100644
--- a/src/scribe_data/language_data_extraction/Malayalam/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Malayalam/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Malayalam (Q36236) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql
index be39d953e..d1402399b 100644
--- a/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Malayalam (Q36236) nouns and their plurals in the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?gender
diff --git a/src/scribe_data/language_data_extraction/Malayalam/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Malayalam/prepositions/query_prepositions.sparql
index 0e2487fce..59b1c4075 100644
--- a/src/scribe_data/language_data_extraction/Malayalam/prepositions/query_prepositions.sparql
+++ b/src/scribe_data/language_data_extraction/Malayalam/prepositions/query_prepositions.sparql
@@ -2,7 +2,7 @@
 # All Malayalam (Q36236) prepositions.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?preposition
 
diff --git a/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql
index 351b7af31..0db34c67c 100644
--- a/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Malayalam (Q36236) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?verb
   ?presentInfinitive
diff --git "a/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/nouns/query_nouns.sparql" "b/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/nouns/query_nouns.sparql"
index 1e8779c90..4f505b678 100644
--- "a/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/nouns/query_nouns.sparql"
+++ "b/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/nouns/query_nouns.sparql"
@@ -3,7 +3,7 @@
 # Enter this query at https://query.wikidata.org/.
 # Note: This query is for Bokmål (Q25167) rather than Nynorsk (Q25164).
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?indefSingular
   ?defSingular
diff --git "a/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/verbs/query_verbs.sparql" "b/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/verbs/query_verbs.sparql"
index 1292041c3..e4cd7bef0 100644
--- "a/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/verbs/query_verbs.sparql"
+++ "b/src/scribe_data/language_data_extraction/Norwegian/Bokm\303\245l/verbs/query_verbs.sparql"
@@ -3,7 +3,7 @@
 # Enter this query at https://query.wikidata.org/.
 # Note: This query is for Bokmål (Q25167) rather than Nynorsk (Q25164).
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?present
diff --git a/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/nouns/query_nouns.sparql
index d2cb20182..60384065f 100644
--- a/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/nouns/query_nouns.sparql
@@ -3,7 +3,7 @@
 # Enter this query at https://query.wikidata.org/.
 # Note: This query is for Nynorsk (Q25164) rather than Bokmål (Q25167).
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?indefSingular
   ?defSingular
diff --git a/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/verbs/query_verbs.sparql
index c18c6d3c9..2cbc7d65f 100644
--- a/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Norwegian/Nynorsk/verbs/query_verbs.sparql
@@ -3,7 +3,7 @@
 # Enter this query at https://query.wikidata.org/.
 # Note: This query is for Nynorsk (Q25164) rather than Bokmål (Q25167).
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?aInfinitiveActive
diff --git a/src/scribe_data/language_data_extraction/Pidgin/Nigerian/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Pidgin/Nigerian/adverbs/query_adverbs.sparql
index d65394d09..be6e4810a 100644
--- a/src/scribe_data/language_data_extraction/Pidgin/Nigerian/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Pidgin/Nigerian/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Nigerian Pidgin (Q33655) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Pidgin/Nigerian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Pidgin/Nigerian/nouns/query_nouns.sparql
index 3af46c7af..a22b1e059 100644
--- a/src/scribe_data/language_data_extraction/Pidgin/Nigerian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Pidgin/Nigerian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Nigerian Pidgin (Q33655) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql
index 420f8e1b2..5bd7e4fd4 100644
--- a/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Polish (Q809) nouns, their plurals and their genders in the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql
index e4d95e96a..705ae0cb0 100644
--- a/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Portuguese (Q5146) nouns, their plurals and their genders.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql
index 7ce7c48d9..79abc949d 100644
--- a/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Portuguese (Q5146) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presFPS ?presSPS ?presTPS
diff --git a/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/nouns/query_nouns.sparql
index 011a9df9d..3fa164731 100644
--- a/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/nouns/query_nouns.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "pa" to select Gurmukhi words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/verbs/query_verbs.sparql
index 72558a266..99999e0b0 100644
--- a/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Punjabi/Gurmukhi/verbs/query_verbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "pa" to select Gurmukhi words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?verb
 
diff --git a/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/nouns/query_nouns.sparql
index 2ba573bfe..a930fb16f 100644
--- a/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/nouns/query_nouns.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "pnb" to select Shahmukhi words.
 
-SELECT DISTINCT
+SELECT
   ?lexeme
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
diff --git a/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/verbs/query_verbs.sparql
index 221016f93..4d7de132f 100644
--- a/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Punjabi/Shahmukhi/verbs/query_verbs.sparql
@@ -4,7 +4,7 @@
 
 # Note: We need to filter for "pnb" to select Shahmukhi words.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?verb
 
diff --git a/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql
index da6685228..25abb07a9 100644
--- a/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Russian (Q7737) nouns, their plurals and their genders in the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql
index 7b7aaf4fa..501d23e1c 100644
--- a/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql
+++ b/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql
@@ -2,7 +2,7 @@
 # All Russian (Q7737) verbs and the currently implemented tenses for each.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?infinitive
   ?presFPS ?presSPS ?presTPS
diff --git a/src/scribe_data/language_data_extraction/Slovak/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Slovak/adverbs/query_adverbs.sparql
index d80e628fc..e123b7cc9 100644
--- a/src/scribe_data/language_data_extraction/Slovak/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Slovak/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Slovak (Q9058) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql
index b10482aac..9bafa552e 100644
--- a/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Slovak (Q9058) nouns, their plurals and their genders for the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql
index a88e8faae..dd0b54d87 100644
--- a/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql
@@ -3,7 +3,7 @@
 # Enter this query at https://query.wikidata.org/.
 # Note: Spansih sometimes has masculine and feminine versions on a single lexeme.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Swahili/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Swahili/nouns/query_nouns.sparql
index 547430d04..fb7055fb0 100644
--- a/src/scribe_data/language_data_extraction/Swahili/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Swahili/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Swahili (Q7838) nouns and their plurals.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?singular
   ?plural
diff --git a/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql
index 720fb5b23..243733b0b 100644
--- a/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Swedish (Q9027) nouns with their plural, gender and genitive forms.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomIndefSingular
   ?nomIndefPlural
diff --git a/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql
index 11ffb5718..44b5f0aae 100644
--- a/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Tajik (Q9260) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 
diff --git a/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql
index 16ceb45f0..ae10914e1 100644
--- a/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Tamil (Q5885) nouns and their plurals for the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Ukrainian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Ukrainian/nouns/query_nouns.sparql
index 30450c04f..1548b4c46 100644
--- a/src/scribe_data/language_data_extraction/Ukrainian/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Ukrainian/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Ukrainian (Q8798) nouns, their plurals and their genders for the given cases.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?nomSingular
   ?nomPlural
diff --git a/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql
index 38387bde2..ad6db4eff 100644
--- a/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Yoruba/adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Yoruba (Q34311) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
diff --git a/src/scribe_data/language_data_extraction/Yoruba/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Yoruba/nouns/query_nouns.sparql
index 47c83c80d..44a8f48fa 100644
--- a/src/scribe_data/language_data_extraction/Yoruba/nouns/query_nouns.sparql
+++ b/src/scribe_data/language_data_extraction/Yoruba/nouns/query_nouns.sparql
@@ -2,7 +2,7 @@
 # All Yoruba (Q34311) nouns.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?noun
 

From b5fecce762438a8c97c97a6e5eb4e526d01ecb2f Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 13:07:15 +0100
Subject: [PATCH 04/36] Create query_adverbs.sparql

adverb for chinese/mandarin
---
 .../Chinese/Mandarin/Adverbs/query_adverbs.sparql   | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..46251a815
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Standard Mandarin Chinese (Q727694) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT DISTINCT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q727694 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From ae15e7772597dade1259d200ea441b43075256de Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:21:12 +0200
Subject: [PATCH 05/36] Add filter for language

---
 .../Chinese/Mandarin/Adverbs/query_adverbs.sparql              | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
index 46251a815..8633280f4 100644
--- a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
@@ -2,7 +2,7 @@
 # All Standard Mandarin Chinese (Q727694) adverbs.
 # Enter this query at https://query.wikidata.org/.
 
-SELECT DISTINCT
+SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
 
@@ -10,4 +10,5 @@ WHERE {
   ?lexeme dct:language wd:Q727694 ;
     wikibase:lexicalCategory wd:Q380057 ;
     wikibase:lemma ?adverb .
+    FILTER(LANG(?adverb) = "zh") .
 }

From f5f74049df6d915d4eae84f8ae984b388a191b99 Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 14:08:24 +0100
Subject: [PATCH 06/36] Create query_adverbs.sparql

adverb for english
---
 .../English/Adverbs/query_adverbs.sparql            | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..cf29f5aef
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All English (Q1860) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT DISTINCT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q1860 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From e250233d33cd8e4f5b362e0ee162c35e0a08aaa6 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:32:34 +0200
Subject: [PATCH 07/36] Remove adverb file and prepare tests

---
 .../English/Adverbs/query_adverbs.sparql            | 13 -------------
 tests/cli/test_list.py                              |  1 +
 2 files changed, 1 insertion(+), 13 deletions(-)
 delete mode 100644 src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql
deleted file mode 100644
index cf29f5aef..000000000
--- a/src/scribe_data/language_data_extraction/English/Adverbs/query_adverbs.sparql
+++ /dev/null
@@ -1,13 +0,0 @@
-# tool: scribe-data
-# All English (Q1860) adverbs.
-# Enter this query at https://query.wikidata.org/.
-
-SELECT DISTINCT
-  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
-  ?adverb
-
-WHERE {
-  ?lexeme dct:language wd:Q1860 ;
-    wikibase:lexicalCategory wd:Q380057 ;
-    wikibase:lemma ?adverb .
-}
diff --git a/tests/cli/test_list.py b/tests/cli/test_list.py
index 1ec2ec1e4..def230511 100644
--- a/tests/cli/test_list.py
+++ b/tests/cli/test_list.py
@@ -80,6 +80,7 @@ def test_list_data_types_specific_language(self, mock_print):
             call("Available data types: English"),
             call("-----------------------------"),
             call("adjectives"),
+            call("adverbs"),
             call("emoji-keywords"),
             call("nouns"),
             call("verbs"),

From 52dca1911b453bcf7e9c8d531e03b65fba77cea1 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:33:04 +0200
Subject: [PATCH 08/36] Re-add English adverbs

---
 .../English/adverbs/query_adverbs.sparql            | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/English/adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/English/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/English/adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..cf29f5aef
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/English/adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All English (Q1860) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT DISTINCT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q1860 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From 7dbf7b018e088571206a0f5eec39190cfdca7cbc Mon Sep 17 00:00:00 2001
From: Veronica Waiganjo <veronicahwags@gmail.com>
Date: Tue, 15 Oct 2024 15:06:47 +0300
Subject: [PATCH 09/36] Add Chinese Mndarin adverbs,prepositions,adjectives and
 emoji keywords

---
 .../Prepositions/query_prepositions.sparql    | 13 ++++++
 .../adjectives/query_adjectives.sparql        | 13 ++++++
 .../Mandarin/adverbs/query_adverbs.sparql     | 13 ++++++
 .../Mandarin/emoji_keywords/__init__.py       |  0
 .../emoji_keywords/generate_emoji_keywords.py | 46 +++++++++++++++++++
 5 files changed, 85 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/adjectives/query_adjectives.sparql
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/__init__.py
 create mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/generate_emoji_keywords.py

diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
new file mode 100644
index 000000000..f34db8f8b
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Standard Mandarin Chinese (Q1412) prepositions.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?preposition
+
+WHERE {
+  ?lexeme dct:language wd:Q727694 ;
+    wikibase:lexicalCategory wd:Q4833830 ;
+    wikibase:lemma ?preposition .
+}
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/adjectives/query_adjectives.sparql
new file mode 100644
index 000000000..75f5f6df3
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/adjectives/query_adjectives.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Mandarin Chinese (Q727694) adjectives.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adjective
+
+WHERE {
+  ?lexeme dct:language wd:Q727694 ;
+    wikibase:lexicalCategory wd:Q34698 ;
+    wikibase:lemma ?adjective .
+}
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..c22972442
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Mandarin Chinese (Q727694) adverb.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q727694 ;
+    wikibase:lexicalCategory wd:Q380057  ;
+    wikibase:lemma ?adverb.
+}
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/__init__.py b/src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/generate_emoji_keywords.py b/src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/generate_emoji_keywords.py
new file mode 100644
index 000000000..fb1e509b1
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/emoji_keywords/generate_emoji_keywords.py
@@ -0,0 +1,46 @@
+"""
+Generates keyword-emoji relationships from a selection of Mandarin Chinese words.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import argparse
+
+from scribe_data.unicode.process_unicode import gen_emoji_lexicon
+from scribe_data.utils import export_formatted_data
+
+LANGUAGE = "Standard Mandarin"
+DATA_TYPE = "emoji-keywords"
+emojis_per_keyword = 3
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--file-path")
+args = parser.parse_args()
+
+if emoji_keywords_dict := gen_emoji_lexicon(
+    language=LANGUAGE,
+    emojis_per_keyword=emojis_per_keyword,
+):
+    export_formatted_data(
+        file_path=args.file_path,
+        formatted_data=emoji_keywords_dict,
+        query_data_in_use=True,
+        language=LANGUAGE,
+        data_type=DATA_TYPE,
+    )

From 5a383f2b9a1853b60eb758274660aaffd13df8f5 Mon Sep 17 00:00:00 2001
From: Veronica Waiganjo <veronicahwags@gmail.com>
Date: Tue, 15 Oct 2024 15:25:29 +0300
Subject: [PATCH 10/36] Update Mandarin prepositions query

---
 .../Chinese/Mandarin/Prepositions/query_prepositions.sparql     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
index f34db8f8b..4757b637f 100644
--- a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
+++ b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Prepositions/query_prepositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Standard Mandarin Chinese (Q1412) prepositions.
+# All Standard Mandarin Chinese (Q727694) prepositions.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

From 1942d0989fe9a53593bb7e34dcd3b451563f9e68 Mon Sep 17 00:00:00 2001
From: Veronica Waiganjo <veronicahwags@gmail.com>
Date: Tue, 15 Oct 2024 16:32:05 +0300
Subject: [PATCH 11/36] Remove Mandarin Adverbs directory

---
 .../Chinese/Mandarin/adverbs/query_adverbs.sparql   | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql
deleted file mode 100644
index c22972442..000000000
--- a/src/scribe_data/language_data_extraction/Chinese/Mandarin/adverbs/query_adverbs.sparql
+++ /dev/null
@@ -1,13 +0,0 @@
-# tool: scribe-data
-# All Mandarin Chinese (Q727694) adverb.
-# Enter this query at https://query.wikidata.org/.
-
-SELECT
-  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
-  ?adverb
-
-WHERE {
-  ?lexeme dct:language wd:Q727694 ;
-    wikibase:lexicalCategory wd:Q380057  ;
-    wikibase:lemma ?adverb.
-}

From 3d505a76c082943195fef21cf10fa064eafd6907 Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 14:39:56 +0100
Subject: [PATCH 12/36] Create query_adverbs.sparql

adverb for Tajik
---
 .../Tajik/Adverbs/query_adverbs.sparql              | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Tajik/Adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Tajik/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Tajik/Adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..b0d714b01
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Tajik/Adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Tajik (Q9260) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT DISTINCT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q9260 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From a871de31d150fdde8d66c18b087d43a05e91d886 Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 15:26:34 +0100
Subject: [PATCH 13/36] Create generate_emoji_keywords.py

Creating emoji_keywords
---
 .../emoji_keywords/generate_emoji_keywords.py | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Slovak/emoji_keywords/generate_emoji_keywords.py

diff --git a/src/scribe_data/language_data_extraction/Slovak/emoji_keywords/generate_emoji_keywords.py b/src/scribe_data/language_data_extraction/Slovak/emoji_keywords/generate_emoji_keywords.py
new file mode 100644
index 000000000..2b0baa7d3
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Slovak/emoji_keywords/generate_emoji_keywords.py
@@ -0,0 +1,46 @@
+"""
+Generates keyword-emoji relationships from a selection of Slovak words.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import argparse
+
+from scribe_data.unicode.process_unicode import gen_emoji_lexicon
+from scribe_data.utils import export_formatted_data
+
+LANGUAGE = "Slovak"
+DATA_TYPE = "emoji-keywords"
+emojis_per_keyword = 3
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--file-path")
+args = parser.parse_args()
+
+if emoji_keywords_dict := gen_emoji_lexicon(
+    language=LANGUAGE,
+    emojis_per_keyword=emojis_per_keyword,
+):
+    export_formatted_data(
+        file_path=args.file_path,
+        formatted_data=emoji_keywords_dict,
+        query_data_in_use=True,
+        language=LANGUAGE,
+        data_type=DATA_TYPE,
+    )

From 318cceb757718e00cde73ed5a681c265f53a0852 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:41:57 +0200
Subject: [PATCH 14/36] Add missing init file

---
 .../language_data_extraction/Slovak/emoji_keywords/__init__.py    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/scribe_data/language_data_extraction/Slovak/emoji_keywords/__init__.py

diff --git a/src/scribe_data/language_data_extraction/Slovak/emoji_keywords/__init__.py b/src/scribe_data/language_data_extraction/Slovak/emoji_keywords/__init__.py
new file mode 100644
index 000000000..e69de29bb

From 52b74268244dc978c78ffe93aebea3f2d907a37d Mon Sep 17 00:00:00 2001
From: Angel osim <69635048+Otom-obhazi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 16:03:04 +0100
Subject: [PATCH 15/36] Create query_adverbs.sparql

Adverb for Basque
---
 .../Basque/Adverbs/query_adverbs.sparql             | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..1cc1a63c9
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Basque (Q8752) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q8752;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From e16dc242b6454b2e3a0fd1c932a8e9ec9447b23c Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 18:46:46 +0200
Subject: [PATCH 16/36] Rename adverb directory

---
 .../Basque/{Adverbs => adverbs}/query_adverbs.sparql              | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/scribe_data/language_data_extraction/Basque/{Adverbs => adverbs}/query_adverbs.sparql (100%)

diff --git a/src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Basque/adverbs/query_adverbs.sparql
similarity index 100%
rename from src/scribe_data/language_data_extraction/Basque/Adverbs/query_adverbs.sparql
rename to src/scribe_data/language_data_extraction/Basque/adverbs/query_adverbs.sparql

From e0f0598096652dbedc0fb1d35b74228fbf74d6de Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 22:48:58 +0530
Subject: [PATCH 17/36] Create query_adjectives_1.sparql

---
 .../adjectives/query_adjectives_1.sparql      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_1.sparql

diff --git a/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_1.sparql b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_1.sparql
new file mode 100644
index 000000000..1eba99f95
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_1.sparql
@@ -0,0 +1,22 @@
+# tool: scribe-data
+# All Czech (Q9056) adjectives in the given cases.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adjective
+  ?nominative
+
+WHERE {
+  ?lexeme dct:language wd:Q9056 ;
+    wikibase:lexicalCategory wd:Q34698 ;
+    wikibase:lemma ?adjective .
+
+  # MARK: Nominative
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?nominativeForm .
+    ?nominativeForm ontolex:representation ?nominative ;
+      wikibase:grammaticalFeature wd:Q131105 .
+  } .
+}

From 51d1f1d171b1e4c8cef805a77a431d782ac4776e Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 22:51:23 +0530
Subject: [PATCH 18/36] Create query_adjective_2.sparql

---
 .../Czech/adjectives/query_adjective_2.sparql | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql

diff --git a/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql
new file mode 100644
index 000000000..43e34962f
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql
@@ -0,0 +1,22 @@
+# tool: scribe-data
+# All Czech (Q9056) adjectives in the given cases.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adjective
+  ?genitive
+  
+WHERE {
+  ?lexeme dct:language wd:Q9056 ;
+    wikibase:lexicalCategory wd:Q34698 ;
+    wikibase:lemma ?adjective .
+
+  # MARK: Genitive
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?genitiveForm .
+    ?genitiveForm ontolex:representation ?genitive ;
+      wikibase:grammaticalFeature wd:Q146233 .
+  } .
+}

From cc7b9e67559fa1c1e69002941141ad8f3ebb892c Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:05:29 +0530
Subject: [PATCH 19/36] Create query_adjectives_3.sparql

---
 .../adjectives/query_adjectives_3.sparql      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_3.sparql

diff --git a/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_3.sparql b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_3.sparql
new file mode 100644
index 000000000..3be851852
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_3.sparql
@@ -0,0 +1,22 @@
+# tool: scribe-data
+# All Czech (Q9056) adjectives in the given cases.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adjective
+  ?locative
+
+WHERE {
+  ?lexeme dct:language wd:Q9056 ;
+    wikibase:lexicalCategory wd:Q34698 ;
+    wikibase:lemma ?adjective .
+
+  # MARK: Locative
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?locativeForm .
+    ?locativeForm ontolex:representation ?locative ;
+      wikibase:grammaticalFeature wd:Q202142 .
+  } .
+}

From 2fc8ed778bafeb7516880713bb1b1fdbb28207fe Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:05:50 +0530
Subject: [PATCH 20/36] Rename query_adjective_2.sparql to
 query_adjectives_2.sparql

---
 .../{query_adjective_2.sparql => query_adjectives_2.sparql}       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/scribe_data/language_data_extraction/Czech/adjectives/{query_adjective_2.sparql => query_adjectives_2.sparql} (100%)

diff --git a/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql b/src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_2.sparql
similarity index 100%
rename from src/scribe_data/language_data_extraction/Czech/adjectives/query_adjective_2.sparql
rename to src/scribe_data/language_data_extraction/Czech/adjectives/query_adjectives_2.sparql

From 0bd670eb2de1fef13836fc0967f67561f8658306 Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:10:01 +0530
Subject: [PATCH 21/36] Create query_adverbs.sparql

---
 .../Czech/adverbs/query_adverbs.sparql              | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..913ebbc3e
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Czech (Q9056) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q9056 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From f276d16e24c2f8ea73f764ede84cb533c7158d75 Mon Sep 17 00:00:00 2001
From: Arpita kesharwani <107834813+KesharwaniArpita@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:23:00 +0530
Subject: [PATCH 22/36] Create generate_emoji_keywords.py

---
 .../emoji_keywords/generate_emoji_keywords.py | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/emoji_keywords/generate_emoji_keywords.py

diff --git a/src/scribe_data/language_data_extraction/Czech/emoji_keywords/generate_emoji_keywords.py b/src/scribe_data/language_data_extraction/Czech/emoji_keywords/generate_emoji_keywords.py
new file mode 100644
index 000000000..0723195e4
--- /dev/null
+++ b/src/scribe_data/language_data_extraction/Czech/emoji_keywords/generate_emoji_keywords.py
@@ -0,0 +1,47 @@
+
+"""
+Generates keyword-emoji relationships from a selection of Czech words.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import argparse
+
+from scribe_data.unicode.process_unicode import gen_emoji_lexicon
+from scribe_data.utils import export_formatted_data
+
+LANGUAGE = "Czech"
+DATA_TYPE = "emoji-keywords"
+emojis_per_keyword = 3
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--file-path")
+args = parser.parse_args()
+
+if emoji_keywords_dict := gen_emoji_lexicon(
+    language=LANGUAGE,
+    emojis_per_keyword=emojis_per_keyword,
+):
+    export_formatted_data(
+        file_path=args.file_path,
+        formatted_data=emoji_keywords_dict,
+        query_data_in_use=True,
+        language=LANGUAGE,
+        data_type=DATA_TYPE,
+    )

From a5779515dbf3fb85804712fc5996c338eb90b9b8 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 21:36:01 +0200
Subject: [PATCH 23/36] Add forms to adjectives query

---
 .../Czech/adverbs/query_adverbs.sparql        | 20 ++++++++++++++++++-
 .../Czech/emoji_keywords/__init__.py          |  0
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 src/scribe_data/language_data_extraction/Czech/emoji_keywords/__init__.py

diff --git a/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql
index 913ebbc3e..693955f2b 100644
--- a/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Czech/adverbs/query_adverbs.sparql
@@ -1,13 +1,31 @@
 # tool: scribe-data
-# All Czech (Q9056) adverbs.
+# All Czech (Q9056) adverbs in the given cases.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
+  ?compararive
+  ?superlative
 
 WHERE {
   ?lexeme dct:language wd:Q9056 ;
     wikibase:lexicalCategory wd:Q380057 ;
     wikibase:lemma ?adverb .
+
+  # MARK: Comparative
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?comparariveForm .
+    ?comparariveForm ontolex:representation ?compararive ;
+      wikibase:grammaticalFeature wd:Q14169499 .
+  }
+
+  # MARK: Superlative
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?superlativeForm .
+    ?superlativeForm ontolex:representation ?superlative ;
+      wikibase:grammaticalFeature wd:Q1817208 .
+  }
 }
diff --git a/src/scribe_data/language_data_extraction/Czech/emoji_keywords/__init__.py b/src/scribe_data/language_data_extraction/Czech/emoji_keywords/__init__.py
new file mode 100644
index 000000000..e69de29bb

From adc061f1550009ee422ea9470603cd3045d4253d Mon Sep 17 00:00:00 2001
From: Omar Agiez <omaragiez3@gmail.com>
Date: Tue, 15 Oct 2024 20:30:54 +0300
Subject: [PATCH 24/36] adding a sparql file in Tamil/adverbs for Tamil adverbs

---
 .../language_data_extraction/Tamil/adverbs/query_adverbs.sparql   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql

diff --git a/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
new file mode 100644
index 000000000..e69de29bb

From 7d0195bc55b31a79e64755c9b4f905414189b4c7 Mon Sep 17 00:00:00 2001
From: Omar Agiez <omaragiez3@gmail.com>
Date: Tue, 15 Oct 2024 20:32:51 +0300
Subject: [PATCH 25/36] simple sparql query for fetching Tamil adverbs from
 wikidata

---
 .../Tamil/adverbs/query_adverbs.sparql              | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
index e69de29bb..86a7a8eb9 100644
--- a/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
@@ -0,0 +1,13 @@
+# tool: scribe-data
+# All Tamil (Q5885) adverbs.
+# Enter this query at https://query.wikidata.org/.
+
+SELECT
+  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
+  ?adverb
+
+WHERE {
+  ?lexeme dct:language wd:Q5885 ;
+    wikibase:lexicalCategory wd:Q380057 ;
+    wikibase:lemma ?adverb .
+}

From 7c3b037ff4c3612910752c854dcd9de996dc5eff Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 21:40:03 +0200
Subject: [PATCH 26/36] Add vocative

---
 .../Tamil/adverbs/query_adverbs.sparql                | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
index 86a7a8eb9..72e2a4a96 100644
--- a/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
+++ b/src/scribe_data/language_data_extraction/Tamil/adverbs/query_adverbs.sparql
@@ -1,13 +1,22 @@
 # tool: scribe-data
-# All Tamil (Q5885) adverbs.
+# All Tamil (Q5885) adverbs in the given case.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT
   (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
   ?adverb
+  ?vocative
 
 WHERE {
   ?lexeme dct:language wd:Q5885 ;
     wikibase:lexicalCategory wd:Q380057 ;
     wikibase:lemma ?adverb .
+
+  # MARK: Vocative
+
+  OPTIONAL {
+    ?lexeme ontolex:lexicalForm ?vocativeForm .
+    ?vocativeForm ontolex:representation ?vocative ;
+      wikibase:grammaticalFeature wd:Q185077 .
+  }
 }

From ae2e662873e923aa10cb8c6f372d19f307a8b262 Mon Sep 17 00:00:00 2001
From: axif <muhamadasif570@gmail.com>
Date: Tue, 15 Oct 2024 23:06:57 +0600
Subject: [PATCH 27/36] fix  lists of arguments to be validated

---
 src/scribe_data/cli/cli_utils.py | 142 +++++++++++++++++--------------
 src/scribe_data/cli/main.py      |  13 ++-
 tests/cli/test_utils.py          |  42 ++++++++-
 3 files changed, 127 insertions(+), 70 deletions(-)

diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index e3e62485c..8de5c7dec 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -23,7 +23,7 @@
 import difflib
 import json
 from pathlib import Path
-from typing import Union
+from typing import Union, List
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
@@ -155,79 +155,91 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None:
 # MARK: Validate
 
 
-def validate_language_and_data_type(language: str, data_type: str):
+def validate_language_and_data_type(
+    language: Union[str, List[str], bool, None],
+    data_type: Union[str, List[str], bool, None],
+):
     """
     Validates that the language and data type QIDs are not None.
 
     Parameters
     ----------
-        language : str
-            The language to validate.
-
-        data_type : str
-            The data type to validate.
+    language : str or list
+        The language(s) to validate.
+    data_type : str or list
+        The data type(s) to validate.
 
     Raises
     ------
-        ValueError
-            If either the language or data type is invalid (None).
+    ValueError
+        If any of the languages or data types is invalid, with all errors reported together.
     """
-    # Not functional for lists of arguments yet.
-    if isinstance(language, list) or isinstance(data_type, list):
-        return
-
-    language_is_valid = True
-    data_type_is_valid = True
-
-    value_error = ""
-    closest_language_match_string = ""
-    closest_data_type_match_string = ""
-
-    if (
-        isinstance(language, str)
-        and language.lower() not in language_to_qid.keys()
-        and not language.startswith("Q")
-        and not language[1:].isdigit()
-    ):
-        language_is_valid = False
-        if closest_language_match := difflib.get_close_matches(
-            language, language_map.keys(), n=1
-        ):
-            closest_language_match_cap = closest_language_match[0].capitalize()
-            closest_language_match_string = (
-                f" The closest matching language is {closest_language_match_cap}."
-            )
-
-    if (
-        isinstance(data_type, str)
-        and data_type not in data_type_metadata.keys()
-        and not data_type.startswith("Q")
-        and not data_type[1:].isdigit()
-    ):
-        data_type_is_valid = False
 
-        if closest_data_type_match := difflib.get_close_matches(
-            data_type, data_type_metadata.keys(), n=1
+    def validate_single_item(item, valid_options, item_type):
+        """
+        Validates a single item against a list of valid options, providing error messages and suggestions.
+
+        Parameters
+        ----------
+        item : str
+            The item to validate.
+        valid_options : list
+            A list of valid options against which the item will be validated.
+        item_type : str
+            A description of the item type (e.g., "language", "data-type") used in error messages.
+
+        Returns
+        -------
+        str or None
+            Returns an error message if the item is invalid, or None if the item is valid.
+        """
+        if (
+            isinstance(item, str)
+            and item.lower().strip() not in valid_options
+            and not item.startswith("Q")
+            and not item[1:].isdigit()
         ):
-            closest_data_type_match_string = (
-                f" The closest matching data-type is {closest_data_type_match[0]}."
+            closest_match = difflib.get_close_matches(item, valid_options, n=1)
+            closest_match_str = (
+                f" The closest matching {item_type} is {closest_match[0]}"
+                if closest_match
+                else ""
             )
-
-    if not language_is_valid and data_type_is_valid:
-        value_error = (
-            f"Invalid language {language} passed.{closest_language_match_string}"
-        )
-
-        raise ValueError(value_error)
-
-    elif language_is_valid and not data_type_is_valid:
-        value_error = (
-            f"Invalid data-type {data_type} passed.{closest_data_type_match_string}"
-        )
-
-        raise ValueError(value_error)
-
-    elif not language_is_valid and not data_type_is_valid:
-        value_error = f"Invalid language {language} and data-type {data_type} passed.{closest_language_match_string}{closest_data_type_match_string}"
-
-        raise ValueError(value_error)
+            return f"Invalid {item_type} {item}{closest_match_str}"
+        return None
+
+    errors = []
+
+    # Handle language validation
+    if language is None or isinstance(language, bool):
+        pass
+    elif isinstance(language, str):
+        language = [language]
+    elif not isinstance(language, list):
+        errors.append("Language must be a string or a list of strings.")
+
+    if language is not None and isinstance(language, list):
+        for lang in language:
+            error = validate_single_item(lang, language_to_qid.keys(), "language")
+            if error:
+                errors.append(error)
+
+    # Handle data type validation
+    if data_type is None or isinstance(data_type, bool):
+        pass
+    elif isinstance(data_type, str):
+        data_type = [data_type]
+    elif not isinstance(data_type, list):
+        errors.append("Data type must be a string or a list of strings.")
+
+    if data_type is not None and isinstance(data_type, list):
+        for dt in data_type:
+            error = validate_single_item(dt, data_type_metadata.keys(), "data-type")
+            if error:
+                errors.append(error)
+
+    # Raise ValueError with the combined error message
+    if errors:
+        raise ValueError(" and ".join(errors) + " passed.")
+    else:
+        return True
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 7c88485a2..1cf4758a0 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -201,10 +201,15 @@ def main() -> None:
     # MARK: Setup CLI
 
     args = parser.parse_args()
-    if args.language or args.data_type:
-        validate_language_and_data_type(
-            language=args.language, data_type=args.data_type
-        )
+
+    try:
+        if args.language or args.data_type:
+            validate_language_and_data_type(
+                language=args.language, data_type=args.data_type
+            )
+    except ValueError as e:
+        print(e)
+        return
 
     if args.upgrade:
         upgrade_cli()
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index 149716c2d..32ab82262 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -216,5 +216,45 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage and data-type InvalidDataType passed.",
+            "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.",
         )
+
+    def test_validate_language_and_data_type_with_list(self):
+        """Test validation with lists of languages and data types."""
+        languages = ["English", "Spanish"]
+        data_types = ["nouns", "verbs"]
+        try:
+            validate_language_and_data_type(languages, data_types)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid lists!"
+            )
+
+    def test_validate_language_and_data_type_with_qids(self):
+        """Test validation directly with QIDs."""
+        language_qid = "Q1860"  # QID for English
+        data_type_qid = "Q1084"  # QID for nouns
+        try:
+            validate_language_and_data_type(language_qid, data_type_qid)
+        except ValueError:
+            self.fail(
+                "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
+            )
+
+    def test_validate_language_and_data_type_invalid_list(self):
+        """Test validation with invalid lists."""
+        languages = ["English", "Klingon"]
+        data_types = ["nouns", "alienverbs"]
+        with self.assertRaises(ValueError) as context:
+            validate_language_and_data_type(languages, data_types)
+        self.assertIn("Invalid language Klingon", str(context.exception))
+        self.assertIn("Invalid data-type alienverbs", str(context.exception))
+
+    def test_validate_language_and_data_type_mixed_validity_in_lists(self):
+        """Test validation with mixed valid and invalid entries in lists."""
+        languages = ["English", "InvalidLanguage"]
+        data_types = ["nouns", "InvalidDataType"]
+        with self.assertRaises(ValueError) as context:
+            validate_language_and_data_type(languages, data_types)
+        self.assertIn("Invalid language InvalidLanguage", str(context.exception))
+        self.assertIn("Invalid data-type InvalidDataType", str(context.exception))

From 3e6835c2e5b98a454516e2876e490d541b5e9dc1 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 22:06:17 +0200
Subject: [PATCH 28/36] Minor formatting and edits to outputs

---
 src/scribe_data/cli/cli_utils.py | 52 +++++++++++++++++++-------------
 src/scribe_data/cli/main.py      |  3 +-
 tests/cli/test_utils.py          | 24 +++++----------
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index 8de5c7dec..4f59a65ef 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -23,7 +23,7 @@
 import difflib
 import json
 from pathlib import Path
-from typing import Union, List
+from typing import List, Union
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
@@ -164,15 +164,16 @@ def validate_language_and_data_type(
 
     Parameters
     ----------
-    language : str or list
-        The language(s) to validate.
-    data_type : str or list
-        The data type(s) to validate.
+        language : str or list
+            The language(s) to validate.
+
+        data_type : str or list
+            The data type(s) to validate.
 
     Raises
     ------
-    ValueError
-        If any of the languages or data types is invalid, with all errors reported together.
+        ValueError
+            If any of the languages or data types is invalid, with all errors reported together.
     """
 
     def validate_single_item(item, valid_options, item_type):
@@ -181,17 +182,17 @@ def validate_single_item(item, valid_options, item_type):
 
         Parameters
         ----------
-        item : str
-            The item to validate.
-        valid_options : list
-            A list of valid options against which the item will be validated.
-        item_type : str
-            A description of the item type (e.g., "language", "data-type") used in error messages.
+            item : str
+                The item to validate.
+            valid_options : list
+                A list of valid options against which the item will be validated.
+            item_type : str
+                A description of the item type (e.g., "language", "data-type") used in error messages.
 
         Returns
         -------
-        str or None
-            Returns an error message if the item is invalid, or None if the item is valid.
+            str or None
+                Returns an error message if the item is invalid, or None if the item is valid.
         """
         if (
             isinstance(item, str)
@@ -201,45 +202,54 @@ def validate_single_item(item, valid_options, item_type):
         ):
             closest_match = difflib.get_close_matches(item, valid_options, n=1)
             closest_match_str = (
-                f" The closest matching {item_type} is {closest_match[0]}"
+                f" The closest matching {item_type} is {closest_match[0]}."
                 if closest_match
                 else ""
             )
-            return f"Invalid {item_type} {item}{closest_match_str}"
+
+            return f"Invalid {item_type} {item}.{closest_match_str}"
+
         return None
 
     errors = []
 
-    # Handle language validation
+    # Handle language validation.
     if language is None or isinstance(language, bool):
         pass
+
     elif isinstance(language, str):
         language = [language]
+
     elif not isinstance(language, list):
         errors.append("Language must be a string or a list of strings.")
 
     if language is not None and isinstance(language, list):
         for lang in language:
             error = validate_single_item(lang, language_to_qid.keys(), "language")
+
             if error:
                 errors.append(error)
 
-    # Handle data type validation
+    # Handle data type validation.
     if data_type is None or isinstance(data_type, bool):
         pass
+
     elif isinstance(data_type, str):
         data_type = [data_type]
+
     elif not isinstance(data_type, list):
         errors.append("Data type must be a string or a list of strings.")
 
     if data_type is not None and isinstance(data_type, list):
         for dt in data_type:
             error = validate_single_item(dt, data_type_metadata.keys(), "data-type")
+
             if error:
                 errors.append(error)
 
-    # Raise ValueError with the combined error message
+    # Raise ValueError with the combined error message.
     if errors:
-        raise ValueError(" and ".join(errors) + " passed.")
+        raise ValueError("\n".join(errors))
+
     else:
         return True
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 1cf4758a0..506bbcdd1 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -207,8 +207,9 @@ def main() -> None:
             validate_language_and_data_type(
                 language=args.language, data_type=args.data_type
             )
+
     except ValueError as e:
-        print(e)
+        print(f"Input validation failed with error: {e}")
         return
 
     if args.upgrade:
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index 32ab82262..a827666a2 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -29,6 +29,8 @@
     validate_language_and_data_type,
 )
 
+# MARK: Utils
+
 
 class TestCLIUtils(unittest.TestCase):
     def test_correct_data_type(self):
@@ -145,6 +147,9 @@ def test_print_formatted_data_unknown_type(self):
             mock_print.assert_called_once_with("unknown data type")
 
 
+# MARK: Validate
+
+
 class TestValidateLanguageAndDataType(unittest.TestCase):
     def setUp(self):
         self.qid_mapping = {
@@ -182,9 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid language InvalidLanguage passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
@@ -198,9 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(
-            str(context.exception), "Invalid data-type InvalidDataType passed."
-        )
+        self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
@@ -216,7 +217,7 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage and Invalid data-type InvalidDataType passed.",
+            "Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.",
         )
 
     def test_validate_language_and_data_type_with_list(self):
@@ -241,15 +242,6 @@ def test_validate_language_and_data_type_with_qids(self):
                 "validate_language_and_data_type raised ValueError unexpectedly with valid QIDs!"
             )
 
-    def test_validate_language_and_data_type_invalid_list(self):
-        """Test validation with invalid lists."""
-        languages = ["English", "Klingon"]
-        data_types = ["nouns", "alienverbs"]
-        with self.assertRaises(ValueError) as context:
-            validate_language_and_data_type(languages, data_types)
-        self.assertIn("Invalid language Klingon", str(context.exception))
-        self.assertIn("Invalid data-type alienverbs", str(context.exception))
-
     def test_validate_language_and_data_type_mixed_validity_in_lists(self):
         """Test validation with mixed valid and invalid entries in lists."""
         languages = ["English", "InvalidLanguage"]

From 343ffdb5e7cc2d7e7ee25ab505b2bc3ded41565f Mon Sep 17 00:00:00 2001
From: Purnama S Rahayu <purnamasrahayu@gmail.com>
Date: Mon, 14 Oct 2024 19:49:38 +0700
Subject: [PATCH 29/36] add workflow check_query_identifiers and dummy script
 #339

---
 .../workflows/check_query_identifiers.yaml    | 43 +++++++++++++++++++
 .../Mandarin/Adverbs/query_adverbs.sparql     | 14 ------
 2 files changed, 43 insertions(+), 14 deletions(-)
 create mode 100644 .github/workflows/check_query_identifiers.yaml
 delete mode 100644 src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql

diff --git a/.github/workflows/check_query_identifiers.yaml b/.github/workflows/check_query_identifiers.yaml
new file mode 100644
index 000000000..99300015d
--- /dev/null
+++ b/.github/workflows/check_query_identifiers.yaml
@@ -0,0 +1,43 @@
+name: check_query_identifiers.yaml
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  format_check:
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+        python-version:
+          - "3.9"
+
+    runs-on: ${{ matrix.os }}
+
+    name: Run Check Query Identifiers
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade uv
+          uv venv
+          uv pip install -r requirements.txt
+
+      - name: Activate virtualenv
+        run: |
+          . .venv/bin/activate
+          echo PATH=$PATH >> $GITHUB_ENV
+
+      - name: Run Python script
+        run: python src/scribe_data/check/check_query_identifiers.py
diff --git a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
deleted file mode 100644
index 8633280f4..000000000
--- a/src/scribe_data/language_data_extraction/Chinese/Mandarin/Adverbs/query_adverbs.sparql
+++ /dev/null
@@ -1,14 +0,0 @@
-# tool: scribe-data
-# All Standard Mandarin Chinese (Q727694) adverbs.
-# Enter this query at https://query.wikidata.org/.
-
-SELECT
-  (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
-  ?adverb
-
-WHERE {
-  ?lexeme dct:language wd:Q727694 ;
-    wikibase:lexicalCategory wd:Q380057 ;
-    wikibase:lemma ?adverb .
-    FILTER(LANG(?adverb) = "zh") .
-}

From 230fa58f00a0762e8411291e9b8922f51ad72f7d Mon Sep 17 00:00:00 2001
From: Purnama S Rahayu <purnamasrahayu@gmail.com>
Date: Tue, 15 Oct 2024 08:53:30 +0700
Subject: [PATCH 30/36] Update workflow to trigger on future commits

---
 .github/workflows/check_query_identifiers.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/check_query_identifiers.yaml b/.github/workflows/check_query_identifiers.yaml
index 99300015d..45b8d7e0a 100644
--- a/.github/workflows/check_query_identifiers.yaml
+++ b/.github/workflows/check_query_identifiers.yaml
@@ -3,7 +3,9 @@ on:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    branches:
+      - main
+    types: [opened, reopened, synchronize]
 
 jobs:
   format_check:

From 408abc932b75aab0ac7830f284fd3455472267a5 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 22:26:21 +0200
Subject: [PATCH 31/36] Deactivate workflow so it can be brought into other PRs

---
 .../workflows/check_query_identifiers.yaml    | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/check_query_identifiers.yaml b/.github/workflows/check_query_identifiers.yaml
index 45b8d7e0a..739c5fec3 100644
--- a/.github/workflows/check_query_identifiers.yaml
+++ b/.github/workflows/check_query_identifiers.yaml
@@ -25,21 +25,21 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v3
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade uv
-          uv venv
-          uv pip install -r requirements.txt
-
-      - name: Activate virtualenv
-        run: |
-          . .venv/bin/activate
-          echo PATH=$PATH >> $GITHUB_ENV
-
-      - name: Run Python script
-        run: python src/scribe_data/check/check_query_identifiers.py
+      # - name: Set up Python ${{ matrix.python-version }}
+      #   uses: actions/setup-python@v4
+      #   with:
+      #     python-version: ${{ matrix.python-version }}
+
+      # - name: Install dependencies
+      #   run: |
+      #     python -m pip install --upgrade uv
+      #     uv venv
+      #     uv pip install -r requirements.txt
+
+      # - name: Activate virtualenv
+      #   run: |
+      #     . .venv/bin/activate
+      #     echo PATH=$PATH >> $GITHUB_ENV
+
+      # - name: Run Python script
+      #   run: python src/scribe_data/check/check_query_identifiers.py

From bf02ac8595b56d95c39394110993ee22089ebc38 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 15 Oct 2024 22:27:39 +0200
Subject: [PATCH 32/36] Remove yaml from workflow name

---
 .github/workflows/check_query_identifiers.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/check_query_identifiers.yaml b/.github/workflows/check_query_identifiers.yaml
index 739c5fec3..780da47da 100644
--- a/.github/workflows/check_query_identifiers.yaml
+++ b/.github/workflows/check_query_identifiers.yaml
@@ -1,4 +1,4 @@
-name: check_query_identifiers.yaml
+name: check_query_identifiers
 on:
   push:
     branches: [main]

From 08f6ed117b142032209fafb5f55e91c82086ca75 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Wed, 16 Oct 2024 12:55:16 +0200
Subject: [PATCH 33/36] Update unicode docs

---
 src/scribe_data/cli/get.py                         |  2 +-
 .../unicode/{UNICODE.md => UNICODE_INSTALLTION.md} | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)
 rename src/scribe_data/unicode/{UNICODE.md => UNICODE_INSTALLTION.md} (52%)

diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
index c3d5eecc9..3cbea6980 100644
--- a/src/scribe_data/cli/get.py
+++ b/src/scribe_data/cli/get.py
@@ -154,5 +154,5 @@ def get_data(
             "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
         )
         print(
-            "Please check the installation steps at https://gitlab.pyicu.org/main/pyicu for more information.\n"
+            "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n"
         )
diff --git a/src/scribe_data/unicode/UNICODE.md b/src/scribe_data/unicode/UNICODE_INSTALLTION.md
similarity index 52%
rename from src/scribe_data/unicode/UNICODE.md
rename to src/scribe_data/unicode/UNICODE_INSTALLTION.md
index 2d15a7a7d..e8f493163 100644
--- a/src/scribe_data/unicode/UNICODE.md
+++ b/src/scribe_data/unicode/UNICODE_INSTALLTION.md
@@ -1,5 +1,17 @@
-# scribe_data.unicode
+# Scribe-Data Unicode Functionality Installation
 
 The Scribe-Data Unicode process is powered by [cldr-json](https://github.com/unicode-org/cldr-json) data from the [Unicode Consortium](https://home.unicode.org/) and [PyICU](https://gitlab.pyicu.org/main/pyicu), a Python extension that wraps the Unicode Consortium's [International Components for Unicode (ICU)](https://github.com/unicode-org/icu) C++ project.
 
 Please see the [installation guide for PyICU](https://gitlab.pyicu.org/main/pyicu#installing-pyicu) as the extension must be linked to ICU on your machine to work properly.
+
+Note that some of the commands may be incorrect. On macOS you may need to do the following:
+
+```bash
+# Instead of:
+export PATH="$(brew --prefix)/opt/icu4c/bin:$(brew --prefix)/opt/icu4c/sbin:$PATH"
+export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$(brew --prefix)/opt/icu4c/lib/pkgconfig"
+
+# Run:
+echo "/opt/homebrew/opt/icu4c/bin:/opt/homebrew/opt/icu4c/sbin:$PATH"
+echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/opt/homebrew/opt/icu4c/lib/pkgconfig"
+```

From 5fba72fbb2bec2247f7da8ce6a8d869cf64dad7e Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Wed, 16 Oct 2024 13:53:36 +0200
Subject: [PATCH 34/36] Update Sphynx RTD theme for docs

---
 docs/source/conf.py | 4 ++--
 requirements.txt    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8d6e22d30..0c9e706d5 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -63,7 +63,7 @@
     "pytest-cov",
     "ruff",
     "SPARQLWrapper",
-    "tqdm"
+    "tqdm",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -91,7 +91,7 @@
 
 html_theme = "sphinx_rtd_theme"
 
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+html_theme_path = [sphinx_rtd_theme]
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
diff --git a/requirements.txt b/requirements.txt
index 16c262084..abbd5e443 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,5 +16,5 @@ regex>=2023.3.23
 rich>=10.0.0
 ruff>=0.3.3
 SPARQLWrapper>=2.0.0
-sphinx-rtd-theme>=2.0.0
+sphinx-rtd-theme>=3.0.0
 tqdm==4.66.4

From d37872c109464d3e7e666d000f11eadebab88d43 Mon Sep 17 00:00:00 2001
From: Akindele Michael <akindelemichael65@gmail.com>
Date: Wed, 16 Oct 2024 13:22:36 +0100
Subject: [PATCH 35/36] Cleanup query validation logic: update
 data_type_pattern and clean up print statements

---
 .../check/check_query_identifiers.py          | 86 +++++++++++++------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/src/scribe_data/check/check_query_identifiers.py b/src/scribe_data/check/check_query_identifiers.py
index b379a5c86..52d9fe158 100644
--- a/src/scribe_data/check/check_query_identifiers.py
+++ b/src/scribe_data/check/check_query_identifiers.py
@@ -10,22 +10,41 @@
 
 def extract_qid_from_sparql(file_path: Path, pattern: str) -> str:
     """
-    Extract the QID based on the pattern provided (either language or data type).
+    Extracts the QID from a SPARQL query file based on the provided pattern.
+
+    Parameters
+    ----------
+    file_path : Path
+        The path to the SPARQL query file from which to extract the QID.
+    pattern : str
+        The regex pattern used to match the QID (either for language or data type).
+
+    Returns
+    -------
+    str
+        The extracted QID if found, otherwise None.
     """
     try:
         with open(file_path, "r", encoding="utf-8") as file:
             content = file.read()
             match = re.search(pattern, content)
             if match:
-                return match.group(0).replace("wd:", "")
+                return match.group(0).split("wd:")[1]
     except Exception as e:
         print(f"Error reading {file_path}: {e}")
     return None
 
 
 def check_queries():
+    """
+    Validates SPARQL queries in the specified directory to check for correct language
+    and data type QIDs.
+
+    This function scans all SPARQL query files in the LANGUAGE_DATA_EXTRACTION_DIR
+    and prints out any files with incorrect QIDs for both languages and data types.
+    """
     language_pattern = r"\?lexeme dct:language wd:Q\d+"
-    data_type_pattern = r"wikibase:lexicalCategory wd:Q\d+"
+    data_type_pattern = r"wikibase:lexicalCategory\s+wd:Q\d+"
     incorrect_languages = []
     incorrect_data_types = []
 
@@ -41,17 +60,34 @@ def check_queries():
             incorrect_data_types.append(query_file)
 
     if incorrect_languages:
-        print("Queries with incorrect languages QIDs are:")
+        print("Incorrect Language QIDs found in the following files:")
         for file in incorrect_languages:
             print(f"- {file}")
+    print("\n----------------------------------------------------------------\n")
 
     if incorrect_data_types:
-        print("Queries with incorrect data type QIDs are:")
+        print("Incorrect Data Type QIDs found in the following files:")
         for file in incorrect_data_types:
             print(f"- {file}")
+    print("\n----------------------------------------------------------------\n")
 
 
-def is_valid_language(query_file, lang_qid):
+def is_valid_language(query_file: Path, lang_qid: str) -> bool:
+    """
+    Validates the language QID against the expected QID for the directory.
+
+    Parameters
+    ----------
+    query_file : Path
+        The path to the SPARQL query file being validated.
+    lang_qid : str
+        The QID of the language extracted from the SPARQL query.
+
+    Returns
+    -------
+    bool
+        True if the language QID is valid, otherwise False.
+    """
     lang_directory_name = query_file.parent.parent.name.lower()
     languages = language_metadata.get(
         "languages"
@@ -61,39 +97,39 @@ def is_valid_language(query_file, lang_qid):
     )
 
     if not language_entry:
-        print(
-            f"Warning: Language '{lang_directory_name}' not found in language_metadata.json."
-        )
         return False
 
     expected_language_qid = language_entry["qid"]
-    print("Expected language QID:", expected_language_qid)
 
     if lang_qid != expected_language_qid:
-        print(
-            f"Incorrect language QID in {lang_directory_name}. "
-            f"Found: {lang_qid}, Expected: {expected_language_qid}"
-        )
         return False
     return True
 
 
-def is_valid_data_type(query_file, data_type_qid):
+def is_valid_data_type(query_file: Path, data_type_qid: str) -> bool:
+    """
+    Validates the data type QID against the expected QID for the directory.
+
+    Parameters
+    ----------
+    query_file : Path
+        The path to the SPARQL query file being validated.
+    data_type_qid : str
+        The QID of the data type extracted from the SPARQL query.
+
+    Returns
+    -------
+    bool
+        True if the data type QID is valid, otherwise False.
+    """
     directory_name = query_file.parent.name  # e.g., "nouns" or "verbs"
     expected_data_type_qid = data_type_metadata.get(directory_name)
 
     if data_type_qid != expected_data_type_qid:
-        print(
-            f"Warning: Incorrect data type QID in {query_file}. Found: {data_type_qid}, Expected: {expected_data_type_qid}"
-        )
         return False
     return True
 
 
-# Examples:
-
-# file_path = Path("French/verbs/query_verbs.sparql")
-# print(is_valid_data_type(file_path, "QW24907")) # check for data type
-# print(is_valid_language(file_path, "Q150")) # check for if valid language
-
-check_queries()
+# Run the check_queries function
+# MARK: TODO: Remove Call
+# check_queries()

From 5e8626534a36b0a36598f37930efb4828b3b8c4f Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Thu, 17 Oct 2024 00:56:24 +0200
Subject: [PATCH 36/36] Minor edits to script formatting

---
 .../check/check_query_identifiers.py          | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/scribe_data/check/check_query_identifiers.py b/src/scribe_data/check/check_query_identifiers.py
index 52d9fe158..5f8276e4d 100644
--- a/src/scribe_data/check/check_query_identifiers.py
+++ b/src/scribe_data/check/check_query_identifiers.py
@@ -3,8 +3,8 @@
 
 from scribe_data.cli.cli_utils import (
     LANGUAGE_DATA_EXTRACTION_DIR,
-    language_metadata,
     data_type_metadata,
+    language_metadata,
 )
 
 
@@ -14,24 +14,26 @@ def extract_qid_from_sparql(file_path: Path, pattern: str) -> str:
 
     Parameters
     ----------
-    file_path : Path
-        The path to the SPARQL query file from which to extract the QID.
-    pattern : str
-        The regex pattern used to match the QID (either for language or data type).
+        file_path : Path
+            The path to the SPARQL query file from which to extract the QID.
+
+        pattern : str
+            The regex pattern used to match the QID (either for language or data type).
 
     Returns
     -------
-    str
-        The extracted QID if found, otherwise None.
+        str
+            The extracted QID if found, otherwise None.
     """
     try:
         with open(file_path, "r", encoding="utf-8") as file:
             content = file.read()
-            match = re.search(pattern, content)
-            if match:
-                return match.group(0).split("wd:")[1]
+            if match := re.search(pattern, content):
+                return match[0].split("wd:")[1]
+
     except Exception as e:
         print(f"Error reading {file_path}: {e}")
+
     return None
 
 
@@ -63,12 +65,14 @@ def check_queries():
         print("Incorrect Language QIDs found in the following files:")
         for file in incorrect_languages:
             print(f"- {file}")
+
     print("\n----------------------------------------------------------------\n")
 
     if incorrect_data_types:
         print("Incorrect Data Type QIDs found in the following files:")
         for file in incorrect_data_types:
             print(f"- {file}")
+
     print("\n----------------------------------------------------------------\n")
 
 
@@ -103,6 +107,7 @@ def is_valid_language(query_file: Path, lang_qid: str) -> bool:
 
     if lang_qid != expected_language_qid:
         return False
+
     return True
 
 
@@ -125,9 +130,7 @@ def is_valid_data_type(query_file: Path, data_type_qid: str) -> bool:
     directory_name = query_file.parent.name  # e.g., "nouns" or "verbs"
     expected_data_type_qid = data_type_metadata.get(directory_name)
 
-    if data_type_qid != expected_data_type_qid:
-        return False
-    return True
+    return data_type_qid == expected_data_type_qid
 
 
 # Run the check_queries function