-
Notifications
You must be signed in to change notification settings - Fork 0
/
parseDataset_macroIngredients.py
135 lines (119 loc) · 4.7 KB
/
parseDataset_macroIngredients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# importing libraries
import csv
# importing data
listOfData = []
listOfIngredientsData = []
listOfIngredientsClassesData = []
with open('./dataset_100/NewDataset.csv', newline='', encoding='utf-8') as csvFile:
csvReader = csv.reader(csvFile, delimiter=';', quotechar='|')
for row in csvReader:
listOfData.append(list(row))
with open('./dataset_100/ingredients.csv', newline='', encoding='utf-8') as csvFile:
csvReader = csv.reader(csvFile, delimiter=';', quotechar='|')
for row in csvReader:
listOfIngredientsData.append(list(row))
ingredientsClassesAssociation = []
ingredientsIdPositions = []
preparationsIdPositions = []
for rowIndex, row in enumerate(listOfData):
if rowIndex >= 1:
break
for index, element in enumerate(row):
if (8 <= index <= 61) and (element == "Ingredient ID"):
ingredientsIdPositions.append(index)
if (62 <= index <= 76) and (element == "ID"):
preparationsIdPositions.append(index)
ingredientsIds = []
preparationsIds = []
for rowIndex, row in enumerate(listOfData):
if rowIndex == 0:
continue
for index, element in enumerate(row):
if index in ingredientsIdPositions:
if element not in ingredientsIds and element is not '':
ingredientsIds.append(element)
if index in preparationsIdPositions:
if element not in preparationsIds and element is not '':
preparationsIds.append(element)
# total number of ingredients (max granularity): 135
# total number of preparation: 8
ingredientsIds.sort(key=int)
preparationsIds.sort(key=int)
for ingredientId in ingredientsIds:
couple = []
couple.append(int(ingredientId))
for index_row, row in enumerate(listOfIngredientsData):
if index_row == 0:
continue
if row[1] == ingredientId:
couple.append(int(row[3]))
ingredientsClassesAssociation.append(couple)
finalDataset = []
for index_row, row in enumerate(listOfData):
if index_row == 0:
continue
row_of_dataset = []
ID_of_food = None
name_of_food = ""
category_of_food = None
cost_of_food = None
difficulty_of_food = None
preparation_time_of_food = None
list_of_ingredients_of_food = []
list_of_preparations_of_food = []
for i in range(0, 36):
list_of_ingredients_of_food.append(0)
for i in range(0, 8):
list_of_preparations_of_food.append(0)
link_of_food = ""
for index_element, element in enumerate(row):
if index_element == 0:
name_of_food = element
if index_element == 1:
ID_of_food = int(element)
if index_element == 2:
link_of_food = element
if index_element == 3:
continue
if index_element == 4:
category_of_food = int(element)
if index_element == 5:
cost_of_food = int(element)
if index_element == 6:
difficulty_of_food = int(element)
if index_element == 7:
preparation_time_of_food = int(element)
if index_element in ingredientsIdPositions:
if element == '':
continue
else:
for i in range(0, 135):
if element == ingredientsIds[i]:
for couple_of_association in ingredientsClassesAssociation:
if couple_of_association[0] == int(element):
list_of_ingredients_of_food[couple_of_association[1]-1] = list_of_ingredients_of_food[couple_of_association[1]-1] + int(row[index_element + 1])
if index_element in preparationsIdPositions:
if element == '':
continue
else:
for i in range(0, 8):
if element == preparationsIds[i]:
list_of_preparations_of_food[i] = int(row[index_element + 1])
if index_element >= 77:
row_of_dataset.append(ID_of_food)
row_of_dataset.append(name_of_food)
row_of_dataset.append(category_of_food)
row_of_dataset.append(cost_of_food)
row_of_dataset.append(difficulty_of_food)
row_of_dataset.append(preparation_time_of_food)
row_of_dataset.append(list_of_ingredients_of_food)
row_of_dataset.append(list_of_preparations_of_food)
# for ingredient in list_of_ingredients_of_food:
# row_of_dataset.append(ingredient)
# for preparation in list_of_preparations_of_food:
# row_of_dataset.append(preparation)
row_of_dataset.append(link_of_food)
finalDataset.append(row_of_dataset)
break
for row in finalDataset:
print(row)