From ea9e2a57295f23a92432cb5656871feb770ccb35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Tue, 7 Nov 2023 13:20:31 +0100 Subject: [PATCH] PICA: Extend classification/subject headings schemes from config file #190 --- .../marc/analysis/ClassificationAnalyzer.java | 43 +++++++++++-------- .../marc/dao/record/PicaRecord.java | 8 ++++ .../subject/ClassificationSchemes.java | 1 + 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/main/java/de/gwdg/metadataqa/marc/analysis/ClassificationAnalyzer.java b/src/main/java/de/gwdg/metadataqa/marc/analysis/ClassificationAnalyzer.java index a9cc2c749..1a5d7f3a2 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/analysis/ClassificationAnalyzer.java +++ b/src/main/java/de/gwdg/metadataqa/marc/analysis/ClassificationAnalyzer.java @@ -36,7 +36,7 @@ public class ClassificationAnalyzer { ClassificationSchemes.getInstance(); private static final Pattern NUMERIC = Pattern.compile("^\\d"); public static final String DEWEY_DECIMAL_CLASSIFICATION = "Dewey Decimal Classification"; - private static PicaVocabularyManager manager = null; + private static PicaVocabularyManager picaVocabularyManager = null; private final ClassificationStatistics statistics; private ClassificationParameters parameters = null; @@ -130,8 +130,8 @@ public class ClassificationAnalyzer { public ClassificationAnalyzer(BibliographicRecord marcRecord, ClassificationStatistics statistics) { this.marcRecord = marcRecord; this.statistics = statistics; - if (marcRecord.getSchemaType().equals(SchemaType.PICA) && manager == null) { - manager = PicaVocabularyManager.getInstance(); + if (marcRecord.getSchemaType().equals(SchemaType.PICA) && picaVocabularyManager == null) { + picaVocabularyManager = PicaVocabularyManager.getInstance(); } } @@ -182,24 +182,29 @@ private int processFieldsWithScheme(int total, List fieldsWithS private int processFieldsWithSchemePica(int total, List fieldsWithScheme) { int count = total; - // for (VocabularyEntry entry : manager.getAll()) { - for (FieldWithScheme entry : fieldsWithScheme) { - /* - String tag = entry.getPica(); - String schema = entry.getLabel(); - String voc = entry.getVoc(); - */ - String tag = entry.getTag(); - String schema = entry.getSchemaName(); - String voc = tag; - try { - voc = classificationSchemes.resolve(schema); - } catch (IllegalArgumentException e) { - + boolean processFromTSV = true; + if (processFromTSV) { + for (FieldWithScheme entry : fieldsWithScheme) { + String tag = entry.getTag(); + String schema = entry.getSchemaName(); + String voc = tag; + try { + voc = classificationSchemes.resolve(schema); + } catch (IllegalArgumentException e) { + } + count += processPicaSubject(tag, voc, schema); } - if (!marcRecord.hasDatafield(tag)) - continue; + } else { + for (VocabularyEntry entry : picaVocabularyManager.getAll()) { + count += processPicaSubject(entry.getPica(), entry.getVoc(), entry.getLabel()); + } + } + return count; + } + private int processPicaSubject(String tag, String voc, String schema) { + int count = 0; + if (marcRecord.hasDatafield(tag)) { List fields = marcRecord.getDatafield(tag); List schemas = new ArrayList<>(); for (DataField field : fields) { diff --git a/src/main/java/de/gwdg/metadataqa/marc/dao/record/PicaRecord.java b/src/main/java/de/gwdg/metadataqa/marc/dao/record/PicaRecord.java index ca88ae830..93cb0dcbb 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/dao/record/PicaRecord.java +++ b/src/main/java/de/gwdg/metadataqa/marc/dao/record/PicaRecord.java @@ -149,7 +149,15 @@ private static void initializeAuthorityTags() { subjectTagIndex = Utils.listToMap(subjectTags); skippableSubjectSubfields = new HashMap<>(); skippableSubjectSubfields.put("022A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w"))); + skippableSubjectSubfields.put("044H", Utils.listToMap(Arrays.asList("A"))); // A = Quelle + skippableSubjectSubfields.put("044S", Utils.listToMap(Arrays.asList("9", "A", "V", "7", "3", "w"))); + skippableSubjectSubfields.put("045F", Utils.listToMap(Arrays.asList("A"))); + skippableSubjectSubfields.put("045G", Utils.listToMap(Arrays.asList("A"))); + skippableSubjectSubfields.put("045X", Utils.listToMap(Arrays.asList("A"))); + skippableSubjectSubfields.put("045Y", Utils.listToMap(Arrays.asList("A"))); + skippableSubjectSubfields.put("045N", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w"))); skippableSubjectSubfields.put("045R", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w"))); + skippableSubjectSubfields.put("045T", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w"))); authorityTagsMap = new EnumMap<>(AuthorityCategory.class); authorityTagsMap.put(AuthorityCategory.TITLES, List.of("022A", "022A")); diff --git a/src/main/java/de/gwdg/metadataqa/marc/definition/general/indexer/subject/ClassificationSchemes.java b/src/main/java/de/gwdg/metadataqa/marc/definition/general/indexer/subject/ClassificationSchemes.java index 8538ca260..265ab7dc4 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/definition/general/indexer/subject/ClassificationSchemes.java +++ b/src/main/java/de/gwdg/metadataqa/marc/definition/general/indexer/subject/ClassificationSchemes.java @@ -68,6 +68,7 @@ private void initialize() { schemes.put("LoC Subject Headings", "lcsh0"); schemes.put("Regensburger Verbundklassifikation (RVK)", "rvk"); schemes.put("Medical Subject Headings (MeSH)", "mesh"); + schemes.put("Klassifikation der National Library of Medicine (NLM)", "mesh"); } public String resolve(String key) {