-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_data_jurivoc.py
executable file
·103 lines (86 loc) · 3.62 KB
/
convert_data_jurivoc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
from JurivocData import dataset
from convert_data_graph import convert_graph, update_graph
import argparse
from argparse import ArgumentError
import pathlib
import pandas as pd
from rdflib import Graph
if __name__ == '__main__':
# Generation of arguments
parser = argparse.ArgumentParser(
prog='convert_data_jurivoc',
description='Converts data Jurivoc in Skos',
allow_abbrev=False
)
#
# Add arguments
parser.add_argument('-d','--data',help='Input directory containing the Jurivoc txt files', required=True,type=pathlib.Path,dest='data')
parser.add_argument('-o','--output',help='Output directory', required=True,dest='output')
parser.add_argument('-l','--log',help='Log Directory where the raw dataframes resulting from file parsing will be logged',dest='logs')
parser.add_argument('-g','--previousVersion',help='Directory where the previous version of jurivoc will be read to fetch the previous URIs',type=pathlib.Path,dest='previousVersion')
parser.add_argument('-n','--noComplexSubjects',help="Do not generate the `madsrdf:ComplexSubject` entities",required=False,action='store_true',dest='noComplexSubjects')
try:
# Parse args
args = parser.parse_args()
except ArgumentError as e:
print(f'Error: {e.args()}')
print('---------------------------------------------')
print('| Argument | Value |')
print('---------------------------------------------')
for argument in args._get_kwargs():
print(f'{argument[0].upper()} | {argument[1]}')
print('---------------------------------------------\n')
print("Directory Source: {}".format(args.data))
print("Directory output: {}".format(args.output))
bOutput = os.path.exists(args.output)
if bOutput == False:
os .makedirs(args.output)
print("The {} directory is created.".format(args.output))
bLogs = os.path.exists(args.logs)
if bLogs == False:
os .makedirs(args.logs)
print("The {} directory is created.".format(args.logs))
print("Step 1. Parsing input files...")
# #############################################################
#
# Generate Dataset
#
# Output: result in list type include: Name file and DataFrame
#
###############################################################
readFiles = dataset(args.data)
# Get Dataset list
ds = readFiles.read_file()
# create Log folder
print("Step 1.1 Generate log output files of dataframes...")
for l in ds:
file = os.path.join(args.logs,l[0]+'.csv')
df = l[1]
df.to_csv(file,sep="|",index=False)
# #########################################################
#
# Generate Graph
#
# Output: save a graph file
#
###########################################################
print("Step 2. Generate Jurivoc SKOS graph...")
#Instance
g = convert_graph(ds,args.logs, args.noComplexSubjects)
# Call process
gOutput = g.graph_process()
if len(gOutput) > 0:
gIntermediare = os.path.join(args.logs,'jurivoc_with_label_uris.n3')
gOutput.serialize(format="n3", destination= gIntermediare)
# Call update graph class
s = ''
if args.previousVersion:
s = args.previousVersion
else:
s = ''
updateURIs_Concepts = update_graph(gOutput,s,args.logs,args.noComplexSubjects)
gOutputResult = updateURIs_Concepts.update_uri_concepts()
if len(gOutputResult) > 0:
result = os.path.join(args.output,'jurivoc.n3')
gOutputResult.serialize(format="n3", destination= result)