diff --git a/BigMart Mini Project (Ver2) (1).ipynb b/BigMart Mini Project (Ver2) (1).ipynb new file mode 100644 index 0000000..f83d554 --- /dev/null +++ b/BigMart Mini Project (Ver2) (1).ipynb @@ -0,0 +1,2344 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "0b76993e", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from xgboost import XGBRegressor\n", + "from sklearn import metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9ca5284a", + "metadata": {}, + "outputs": [], + "source": [ + "train_data=pd.read_csv(\"Train.csv\")\n", + "test_data=pd.read_csv('Test.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "278ccd00", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Item_Identifier | \n", + "Item_Weight | \n", + "Item_Fat_Content | \n", + "Item_Visibility | \n", + "Item_Type | \n", + "Item_MRP | \n", + "Outlet_Identifier | \n", + "Outlet_Establishment_Year | \n", + "Outlet_Size | \n", + "Outlet_Location_Type | \n", + "Outlet_Type | \n", + "Item_Outlet_Sales | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "FDA15 | \n", + "9.30 | \n", + "Low Fat | \n", + "0.016047 | \n", + "Dairy | \n", + "249.8092 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "3735.1380 | \n", + "
1 | \n", + "DRC01 | \n", + "5.92 | \n", + "Regular | \n", + "0.019278 | \n", + "Soft Drinks | \n", + "48.2692 | \n", + "OUT018 | \n", + "2009 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type2 | \n", + "443.4228 | \n", + "
2 | \n", + "FDN15 | \n", + "17.50 | \n", + "Low Fat | \n", + "0.016760 | \n", + "Meat | \n", + "141.6180 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "2097.2700 | \n", + "
3 | \n", + "FDX07 | \n", + "19.20 | \n", + "Regular | \n", + "0.000000 | \n", + "Fruits and Vegetables | \n", + "182.0950 | \n", + "OUT010 | \n", + "1998 | \n", + "NaN | \n", + "Tier 3 | \n", + "Grocery Store | \n", + "732.3800 | \n", + "
4 | \n", + "NCD19 | \n", + "8.93 | \n", + "Low Fat | \n", + "0.000000 | \n", + "Household | \n", + "53.8614 | \n", + "OUT013 | \n", + "1987 | \n", + "High | \n", + "Tier 3 | \n", + "Supermarket Type1 | \n", + "994.7052 | \n", + "
\n", + " | Item_Identifier | \n", + "Item_Weight | \n", + "Item_Fat_Content | \n", + "Item_Visibility | \n", + "Item_Type | \n", + "Item_MRP | \n", + "Outlet_Identifier | \n", + "Outlet_Establishment_Year | \n", + "Outlet_Size | \n", + "Outlet_Location_Type | \n", + "Outlet_Type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "FDW58 | \n", + "20.750 | \n", + "Low Fat | \n", + "0.007565 | \n", + "Snack Foods | \n", + "107.8622 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "
1 | \n", + "FDW14 | \n", + "8.300 | \n", + "reg | \n", + "0.038428 | \n", + "Dairy | \n", + "87.3198 | \n", + "OUT017 | \n", + "2007 | \n", + "NaN | \n", + "Tier 2 | \n", + "Supermarket Type1 | \n", + "
2 | \n", + "NCN55 | \n", + "14.600 | \n", + "Low Fat | \n", + "0.099575 | \n", + "Others | \n", + "241.7538 | \n", + "OUT010 | \n", + "1998 | \n", + "NaN | \n", + "Tier 3 | \n", + "Grocery Store | \n", + "
3 | \n", + "FDQ58 | \n", + "7.315 | \n", + "Low Fat | \n", + "0.015388 | \n", + "Snack Foods | \n", + "155.0340 | \n", + "OUT017 | \n", + "2007 | \n", + "NaN | \n", + "Tier 2 | \n", + "Supermarket Type1 | \n", + "
4 | \n", + "FDY38 | \n", + "NaN | \n", + "Regular | \n", + "0.118599 | \n", + "Dairy | \n", + "234.2300 | \n", + "OUT027 | \n", + "1985 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type3 | \n", + "
\n", + " | Item_Identifier | \n", + "Item_Weight | \n", + "Item_Fat_Content | \n", + "Item_Visibility | \n", + "Item_Type | \n", + "Item_MRP | \n", + "Outlet_Identifier | \n", + "Outlet_Establishment_Year | \n", + "Outlet_Size | \n", + "Outlet_Location_Type | \n", + "Outlet_Type | \n", + "Item_Outlet_Sales | \n", + "source | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "FDA15 | \n", + "9.30 | \n", + "Low Fat | \n", + "0.016047 | \n", + "Dairy | \n", + "249.8092 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "3735.1380 | \n", + "train | \n", + "
1 | \n", + "DRC01 | \n", + "5.92 | \n", + "Regular | \n", + "0.019278 | \n", + "Soft Drinks | \n", + "48.2692 | \n", + "OUT018 | \n", + "2009 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type2 | \n", + "443.4228 | \n", + "train | \n", + "
2 | \n", + "FDN15 | \n", + "17.50 | \n", + "Low Fat | \n", + "0.016760 | \n", + "Meat | \n", + "141.6180 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "2097.2700 | \n", + "train | \n", + "
3 | \n", + "FDX07 | \n", + "19.20 | \n", + "Regular | \n", + "0.000000 | \n", + "Fruits and Vegetables | \n", + "182.0950 | \n", + "OUT010 | \n", + "1998 | \n", + "NaN | \n", + "Tier 3 | \n", + "Grocery Store | \n", + "732.3800 | \n", + "train | \n", + "
4 | \n", + "NCD19 | \n", + "8.93 | \n", + "Low Fat | \n", + "0.000000 | \n", + "Household | \n", + "53.8614 | \n", + "OUT013 | \n", + "1987 | \n", + "High | \n", + "Tier 3 | \n", + "Supermarket Type1 | \n", + "994.7052 | \n", + "train | \n", + "
\n", + " | Item_Identifier | \n", + "Item_Weight | \n", + "Item_Fat_Content | \n", + "Item_Visibility | \n", + "Item_Type | \n", + "Item_MRP | \n", + "Outlet_Identifier | \n", + "Outlet_Establishment_Year | \n", + "Outlet_Size | \n", + "Outlet_Location_Type | \n", + "Outlet_Type | \n", + "Item_Outlet_Sales | \n", + "source | \n", + "Outlet_Years | \n", + "New_Item_type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "FDA15 | \n", + "9.300000 | \n", + "Low Fat | \n", + "0.016047 | \n", + "Dairy | \n", + "249.8092 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "3735.1380 | \n", + "train | \n", + "14 | \n", + "Food | \n", + "
1 | \n", + "DRC01 | \n", + "5.920000 | \n", + "Regular | \n", + "0.019278 | \n", + "Soft Drinks | \n", + "48.2692 | \n", + "OUT018 | \n", + "2009 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type2 | \n", + "443.4228 | \n", + "train | \n", + "4 | \n", + "Drinks | \n", + "
2 | \n", + "FDN15 | \n", + "17.500000 | \n", + "Low Fat | \n", + "0.016760 | \n", + "Meat | \n", + "141.6180 | \n", + "OUT049 | \n", + "1999 | \n", + "Medium | \n", + "Tier 1 | \n", + "Supermarket Type1 | \n", + "2097.2700 | \n", + "train | \n", + "14 | \n", + "Food | \n", + "
3 | \n", + "FDX07 | \n", + "19.200000 | \n", + "Regular | \n", + "0.000000 | \n", + "Fruits and Vegetables | \n", + "182.0950 | \n", + "OUT010 | \n", + "1998 | \n", + "Medium | \n", + "Tier 3 | \n", + "Grocery Store | \n", + "732.3800 | \n", + "train | \n", + "15 | \n", + "Food | \n", + "
4 | \n", + "NCD19 | \n", + "8.930000 | \n", + "Non-Edible | \n", + "0.000000 | \n", + "Household | \n", + "53.8614 | \n", + "OUT013 | \n", + "1987 | \n", + "High | \n", + "Tier 3 | \n", + "Supermarket Type1 | \n", + "994.7052 | \n", + "train | \n", + "26 | \n", + "Non-Consumable | \n", + "
5 | \n", + "FDP36 | \n", + "10.395000 | \n", + "Regular | \n", + "0.000000 | \n", + "Baking Goods | \n", + "51.4008 | \n", + "OUT018 | \n", + "2009 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type2 | \n", + "556.6088 | \n", + "train | \n", + "4 | \n", + "Food | \n", + "
6 | \n", + "FDO10 | \n", + "13.650000 | \n", + "Regular | \n", + "0.012741 | \n", + "Snack Foods | \n", + "57.6588 | \n", + "OUT013 | \n", + "1987 | \n", + "High | \n", + "Tier 3 | \n", + "Supermarket Type1 | \n", + "343.5528 | \n", + "train | \n", + "26 | \n", + "Food | \n", + "
7 | \n", + "FDP10 | \n", + "12.792854 | \n", + "Low Fat | \n", + "0.127470 | \n", + "Snack Foods | \n", + "107.7622 | \n", + "OUT027 | \n", + "1985 | \n", + "Medium | \n", + "Tier 3 | \n", + "Supermarket Type3 | \n", + "4022.7636 | \n", + "train | \n", + "28 | \n", + "Food | \n", + "
8 | \n", + "FDH17 | \n", + "16.200000 | \n", + "Regular | \n", + "0.016687 | \n", + "Frozen Foods | \n", + "96.9726 | \n", + "OUT045 | \n", + "2002 | \n", + "Medium | \n", + "Tier 2 | \n", + "Supermarket Type1 | \n", + "1076.5986 | \n", + "train | \n", + "11 | \n", + "Food | \n", + "
9 | \n", + "FDU28 | \n", + "19.200000 | \n", + "Regular | \n", + "0.094450 | \n", + "Frozen Foods | \n", + "187.8214 | \n", + "OUT017 | \n", + "2007 | \n", + "Medium | \n", + "Tier 2 | \n", + "Supermarket Type1 | \n", + "4710.5350 | \n", + "train | \n", + "6 | \n", + "Food | \n", + "
\n", + " | Item_Identifier | \n", + "Item_Weight | \n", + "Item_Fat_Content | \n", + "Item_Visibility | \n", + "Item_Type | \n", + "Item_MRP | \n", + "Outlet_Identifier | \n", + "Outlet_Establishment_Year | \n", + "Outlet_Size | \n", + "Outlet_Location_Type | \n", + "Outlet_Type | \n", + "Item_Outlet_Sales | \n", + "source | \n", + "Outlet_Years | \n", + "New_Item_type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "156 | \n", + "9.30 | \n", + "0 | \n", + "0.016047 | \n", + "4 | \n", + "249.8092 | \n", + "9 | \n", + "1999 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "3735.1380 | \n", + "train | \n", + "14 | \n", + "Food | \n", + "
1 | \n", + "8 | \n", + "5.92 | \n", + "2 | \n", + "0.019278 | \n", + "14 | \n", + "48.2692 | \n", + "3 | \n", + "2009 | \n", + "1 | \n", + "2 | \n", + "2 | \n", + "443.4228 | \n", + "train | \n", + "4 | \n", + "Drinks | \n", + "
2 | \n", + "662 | \n", + "17.50 | \n", + "0 | \n", + "0.016760 | \n", + "10 | \n", + "141.6180 | \n", + "9 | \n", + "1999 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "2097.2700 | \n", + "train | \n", + "14 | \n", + "Food | \n", + "
3 | \n", + "1121 | \n", + "19.20 | \n", + "2 | \n", + "0.000000 | \n", + "6 | \n", + "182.0950 | \n", + "0 | \n", + "1998 | \n", + "1 | \n", + "2 | \n", + "0 | \n", + "732.3800 | \n", + "train | \n", + "15 | \n", + "Food | \n", + "
4 | \n", + "1297 | \n", + "8.93 | \n", + "1 | \n", + "0.000000 | \n", + "9 | \n", + "53.8614 | \n", + "1 | \n", + "1987 | \n", + "0 | \n", + "2 | \n", + "1 | \n", + "994.7052 | \n", + "train | \n", + "26 | \n", + "Non-Consumable | \n", + "
LinearRegression(normalize=True)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression(normalize=True)
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", + " colsample_bynode=1, colsample_bytree=1, enable_categorical=False,\n", + " gamma=0, gpu_id=-1, importance_type=None,\n", + " interaction_constraints='', learning_rate=0.5, max_delta_step=0,\n", + " max_depth=6, min_child_weight=1, missing=nan,\n", + " monotone_constraints='()', n_estimators=100, n_jobs=8,\n", + " num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,\n", + " reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',\n", + " validate_parameters=1, verbosity=None)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", + " colsample_bynode=1, colsample_bytree=1, enable_categorical=False,\n", + " gamma=0, gpu_id=-1, importance_type=None,\n", + " interaction_constraints='', learning_rate=0.5, max_delta_step=0,\n", + " max_depth=6, min_child_weight=1, missing=nan,\n", + " monotone_constraints='()', n_estimators=100, n_jobs=8,\n", + " num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,\n", + " reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',\n", + " validate_parameters=1, verbosity=None)