diff --git a/recommender_system/collaborative/.gitignore b/recommender_system/collaborative/.gitignore new file mode 100644 index 0000000..985e24b --- /dev/null +++ b/recommender_system/collaborative/.gitignore @@ -0,0 +1,3 @@ +*.csv +.ipynb_checkpoints +*.png diff --git a/recommender_system/collaborative/collaborative.ipynb b/recommender_system/collaborative/collaborative.ipynb new file mode 100644 index 0000000..f8f2e1c --- /dev/null +++ b/recommender_system/collaborative/collaborative.ipynb @@ -0,0 +1,2688 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "29b5381a-72b4-4f81-9208-2075f7acad85", + "metadata": {}, + "source": [ + "# Collaborative Recommender System" + ] + }, + { + "cell_type": "markdown", + "id": "cac03d32", + "metadata": {}, + "source": [ + "Configure the project. Indeed you create a dataset in csv format." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6f480cda-8380-4355-998a-5c59d6203b05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: ./dataset/archive.zip\n", + " inflating: anime.csv \n", + " inflating: rating.csv \n" + ] + } + ], + "source": [ + "! rm -rf *.csv\n", + "! unzip ./dataset/archive.zip\n" + ] + }, + { + "cell_type": "markdown", + "id": "52ec2f48", + "metadata": {}, + "source": [ + "Import needed libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd17f780", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from IPython.display import Image\n", + "from scipy.stats import pearsonr\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "d836411b", + "metadata": {}, + "source": [ + "### User-Based Collaborative Filtering\n", + "- Approach: Finds users similar to the target user based on historical interactions.\n", + "- Process:\n", + " 1. Identify users with similar preferences.\n", + " 2. Recommend items liked by these similar users.\n", + "- Pros:\n", + " - Simple to understand and implement.\n", + " - Often effective with sufficient user data.\n", + "- Cons:\n", + " - Performance degrades with large datasets.\n", + " - Struggles with new users (cold start problem).\n", + "\n", + "### Item-Based Collaborative Filtering\n", + "- Approach: Finds items similar to the ones the target user has interacted with.\n", + "- Process:\n", + " 1. Identify items similar to what the user likes.\n", + " 2. Recommend these similar items.\n", + "- Pros:\n", + " - More scalable with large datasets.\n", + " - Can leverage item characteristics and interactions.\n", + "- Cons:\n", + " - Requires significant item interaction data.\n", + " - Might not capture nuanced user preferences.\n", + "\n", + "Both approaches aim to provide personalized recommendations but differ in their method and scalability." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d7b51235", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = \"https://www.scaler.com/topics/images/collaborative.webp\"\n", + "Image(url=url)" + ] + }, + { + "cell_type": "markdown", + "id": "162cb46a", + "metadata": {}, + "source": [ + "### I decided to implement User Based Approach to avoid memory issues." + ] + }, + { + "cell_type": "markdown", + "id": "57b33a77", + "metadata": {}, + "source": [ + "Read data from csv files using pandas and store in data frame structure. Also shuffle data to have uniform distribution. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a102a751", + "metadata": {}, + "outputs": [], + "source": [ + "anime_df = pd.read_csv(\"anime.csv\")\n", + "anime_df = anime_df.sample(frac=1.0, random_state=42).reset_index(drop=True)\n", + "\n", + "rating_df = pd.read_csv(\"rating.csv\")\n", + "rating_df = rating_df.sample(frac=1.0, random_state=42).reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e415ec3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anime_idnamegenretypeepisodesratingmembers
017209Suzy's Zoo: Daisuki! Witzy - Happy BirthdayKidsSpecial16.17158
1173TacticsComedy, Drama, Fantasy, Mystery, Shounen, Supe...TV257.3427358
23616Kamen no Maid GuyAction, Comedy, Ecchi, Super PowerTV127.1427761
318799Take Your WayAction, Music, Seinen, SupernaturalMusic16.661387
418831RinkakuDementia, Horror, MusicMusic15.60606
........................
122894638MilkywayHentai, RomanceOVA25.82695
122905272Tondemo Nezumi DaikatsuyakuAdventureMovie16.53252
122911262Macross II: Lovers AgainAdventure, Mecha, Military, Sci-Fi, Shounen, S...OVA66.476760
1229222819Aikatsu! MovieMusic, School, Shoujo, Slice of LifeMovie17.792813
122932364Virus: Virus Buster SergeAction, Adventure, Mecha, Police, Sci-FiTV125.592250
\n", + "

12294 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " anime_id name \\\n", + "0 17209 Suzy's Zoo: Daisuki! Witzy - Happy Birthday \n", + "1 173 Tactics \n", + "2 3616 Kamen no Maid Guy \n", + "3 18799 Take Your Way \n", + "4 18831 Rinkaku \n", + "... ... ... \n", + "12289 4638 Milkyway \n", + "12290 5272 Tondemo Nezumi Daikatsuyaku \n", + "12291 1262 Macross II: Lovers Again \n", + "12292 22819 Aikatsu! Movie \n", + "12293 2364 Virus: Virus Buster Serge \n", + "\n", + " genre type episodes \\\n", + "0 Kids Special 1 \n", + "1 Comedy, Drama, Fantasy, Mystery, Shounen, Supe... TV 25 \n", + "2 Action, Comedy, Ecchi, Super Power TV 12 \n", + "3 Action, Music, Seinen, Supernatural Music 1 \n", + "4 Dementia, Horror, Music Music 1 \n", + "... ... ... ... \n", + "12289 Hentai, Romance OVA 2 \n", + "12290 Adventure Movie 1 \n", + "12291 Adventure, Mecha, Military, Sci-Fi, Shounen, S... OVA 6 \n", + "12292 Music, School, Shoujo, Slice of Life Movie 1 \n", + "12293 Action, Adventure, Mecha, Police, Sci-Fi TV 12 \n", + "\n", + " rating members \n", + "0 6.17 158 \n", + "1 7.34 27358 \n", + "2 7.14 27761 \n", + "3 6.66 1387 \n", + "4 5.60 606 \n", + "... ... ... \n", + "12289 5.82 695 \n", + "12290 6.53 252 \n", + "12291 6.47 6760 \n", + "12292 7.79 2813 \n", + "12293 5.59 2250 \n", + "\n", + "[12294 rows x 7 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "255c589d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idanime_idrating
0592576506
182035917
2153952097
31280670210
49259329988
............
29258052490690659
292580648795184117
292580743226282997
292580862082173976
2925809426353029
\n", + "

2925810 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " user_id anime_id rating\n", + "0 59257 650 6\n", + "1 8203 591 7\n", + "2 15395 209 7\n", + "3 1280 6702 10\n", + "4 9259 32998 8\n", + "... ... ... ...\n", + "2925805 24906 9065 9\n", + "2925806 48795 18411 7\n", + "2925807 43226 28299 7\n", + "2925808 62082 17397 6\n", + "2925809 42635 302 9\n", + "\n", + "[2925810 rows x 3 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rating_df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1ff09818", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "rating\n", + "-1 625635\n", + " 7 570953\n", + " 8 567465\n", + " 9 362429\n", + " 6 303423\n", + " 10 253867\n", + " 5 144328\n", + " 4 53456\n", + " 3 22105\n", + " 2 12758\n", + " 1 9391\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rating_df['rating'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "47140075", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Title Rating\n", + "0 Boku dake ga Inai Machi 10.0\n", + "1 Violet Evergarden 9.5\n", + "2 Goblin Slayer 6.0\n", + "3 Berserk 8.0\n", + "4 Shingeki no Kyojin 7.0\n", + "5 Tokyo Ghoul 6.5\n", + "6 Orange 6.0\n", + "7 Death Parade 8.0\n", + "8 Death Note 7.5\n", + "9 Bungou Stray Dogs 7.5\n", + "10 Tenki no Ko 8.0\n", + "11 Kimi no Na wa. 8.0\n", + "12 Kimi no Suizou wo Tabetai 8.5\n", + "13 Mononoke Hime 7.5\n", + "14 Sen to Chihiro no Kamikakushi 7.5\n", + "15 Koe no Katachi 8.5\n", + "16 Ao Haru Ride 5.5\n", + "17 Toki wo Kakeru Shoujo 7.0\n", + "18 Another 7.5\n", + "19 Kimetsu no Yaiba 7.0\n", + "20 Shigatsu wa Kimi no Uso 8.0\n", + "21 Byousoku 5 Centimeter 6.0\n", + "22 Kokoro ga Sakebitagatterunda. 7.5\n", + "23 Schick x Evangelion 5.0\n" + ] + } + ], + "source": [ + "userInput = [\n", + " {'Title': 'Boku dake ga Inai Machi', 'Rating': 10.0},\n", + " {'Title': 'Violet Evergarden', 'Rating': 9.5},\n", + " {'Title': 'Goblin Slayer', 'Rating': 6.0},\n", + " {'Title': 'Berserk', 'Rating': 8.0},\n", + " {'Title': 'Shingeki no Kyojin', 'Rating': 7.0},\n", + " {'Title': 'Tokyo Ghoul', 'Rating': 6.5},\n", + " {'Title': 'Orange', 'Rating': 6.0},\n", + " {'Title': 'Death Parade', 'Rating': 8.0},\n", + " {'Title': 'Death Note', 'Rating': 7.5},\n", + " {'Title': 'Bungou Stray Dogs', 'Rating': 7.5},\n", + " {'Title': 'Tenki no Ko', 'Rating': 8.0},\n", + " {'Title': 'Kimi no Na wa.', 'Rating': 8.0},\n", + " {'Title': 'Kimi no Suizou wo Tabetai', 'Rating': 8.5},\n", + " {'Title': 'Mononoke Hime', 'Rating': 7.5},\n", + " {'Title': 'Sen to Chihiro no Kamikakushi', 'Rating': 7.5},\n", + " {'Title': 'Koe no Katachi', 'Rating': 8.5},\n", + " {'Title': 'Ao Haru Ride', 'Rating': 5.5},\n", + " {'Title': 'Toki wo Kakeru Shoujo', 'Rating': 7.0},\n", + " {'Title': 'Another', 'Rating': 7.5},\n", + " {'Title': 'Kimetsu no Yaiba', 'Rating': 7.0},\n", + " {'Title': 'Shigatsu wa Kimi no Uso', 'Rating': 8.0},\n", + " {'Title': 'Byousoku 5 Centimeter', 'Rating': 6.0},\n", + " {'Title': 'Kokoro ga Sakebitagatterunda.', 'Rating': 7.5},\n", + " {'Title': 'Schick x Evangelion', 'Rating': 5.0}\n", + "]\n", + "\n", + "inputAnime = pd.DataFrame(userInput)\n", + "print(inputAnime)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "073f839f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df.columns[:25]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "891d2a60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleRatinganime_id
0Boku dake ga Inai Machi10.031043.0
1Violet Evergarden9.533352.0
2Goblin Slayer6.0NaN
3Berserk8.033.0
4Shingeki no Kyojin7.016498.0
5Tokyo Ghoul6.522319.0
6Orange6.032729.0
7Death Parade8.028223.0
8Death Note7.51535.0
9Bungou Stray Dogs7.531478.0
10Tenki no Ko8.0NaN
11Kimi no Na wa.8.032281.0
12Kimi no Suizou wo Tabetai8.5NaN
13Mononoke Hime7.5164.0
14Sen to Chihiro no Kamikakushi7.5199.0
15Koe no Katachi8.528851.0
16Ao Haru Ride5.521995.0
17Toki wo Kakeru Shoujo7.02236.0
18Another7.511111.0
19Kimetsu no Yaiba7.0NaN
20Shigatsu wa Kimi no Uso8.023273.0
21Byousoku 5 Centimeter6.01689.0
22Kokoro ga Sakebitagatterunda.7.528725.0
23Schick x Evangelion5.031115.0
\n", + "
" + ], + "text/plain": [ + " Title Rating anime_id\n", + "0 Boku dake ga Inai Machi 10.0 31043.0\n", + "1 Violet Evergarden 9.5 33352.0\n", + "2 Goblin Slayer 6.0 NaN\n", + "3 Berserk 8.0 33.0\n", + "4 Shingeki no Kyojin 7.0 16498.0\n", + "5 Tokyo Ghoul 6.5 22319.0\n", + "6 Orange 6.0 32729.0\n", + "7 Death Parade 8.0 28223.0\n", + "8 Death Note 7.5 1535.0\n", + "9 Bungou Stray Dogs 7.5 31478.0\n", + "10 Tenki no Ko 8.0 NaN\n", + "11 Kimi no Na wa. 8.0 32281.0\n", + "12 Kimi no Suizou wo Tabetai 8.5 NaN\n", + "13 Mononoke Hime 7.5 164.0\n", + "14 Sen to Chihiro no Kamikakushi 7.5 199.0\n", + "15 Koe no Katachi 8.5 28851.0\n", + "16 Ao Haru Ride 5.5 21995.0\n", + "17 Toki wo Kakeru Shoujo 7.0 2236.0\n", + "18 Another 7.5 11111.0\n", + "19 Kimetsu no Yaiba 7.0 NaN\n", + "20 Shigatsu wa Kimi no Uso 8.0 23273.0\n", + "21 Byousoku 5 Centimeter 6.0 1689.0\n", + "22 Kokoro ga Sakebitagatterunda. 7.5 28725.0\n", + "23 Schick x Evangelion 5.0 31115.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputAnime = pd.merge(inputAnime, anime_df[['anime_id', 'name']], how='left', left_on='Title', right_on='name')\n", + "inputAnime = inputAnime.drop(columns='name')\n", + "inputAnime" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "86c71ba9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleRatinganime_id
0Boku dake ga Inai Machi10.031043.0
1Violet Evergarden9.533352.0
2Berserk8.033.0
3Shingeki no Kyojin7.016498.0
4Tokyo Ghoul6.522319.0
5Orange6.032729.0
6Death Parade8.028223.0
7Death Note7.51535.0
8Bungou Stray Dogs7.531478.0
9Kimi no Na wa.8.032281.0
10Mononoke Hime7.5164.0
11Sen to Chihiro no Kamikakushi7.5199.0
12Koe no Katachi8.528851.0
13Ao Haru Ride5.521995.0
14Toki wo Kakeru Shoujo7.02236.0
15Another7.511111.0
16Shigatsu wa Kimi no Uso8.023273.0
17Byousoku 5 Centimeter6.01689.0
18Kokoro ga Sakebitagatterunda.7.528725.0
19Schick x Evangelion5.031115.0
\n", + "
" + ], + "text/plain": [ + " Title Rating anime_id\n", + "0 Boku dake ga Inai Machi 10.0 31043.0\n", + "1 Violet Evergarden 9.5 33352.0\n", + "2 Berserk 8.0 33.0\n", + "3 Shingeki no Kyojin 7.0 16498.0\n", + "4 Tokyo Ghoul 6.5 22319.0\n", + "5 Orange 6.0 32729.0\n", + "6 Death Parade 8.0 28223.0\n", + "7 Death Note 7.5 1535.0\n", + "8 Bungou Stray Dogs 7.5 31478.0\n", + "9 Kimi no Na wa. 8.0 32281.0\n", + "10 Mononoke Hime 7.5 164.0\n", + "11 Sen to Chihiro no Kamikakushi 7.5 199.0\n", + "12 Koe no Katachi 8.5 28851.0\n", + "13 Ao Haru Ride 5.5 21995.0\n", + "14 Toki wo Kakeru Shoujo 7.0 2236.0\n", + "15 Another 7.5 11111.0\n", + "16 Shigatsu wa Kimi no Uso 8.0 23273.0\n", + "17 Byousoku 5 Centimeter 6.0 1689.0\n", + "18 Kokoro ga Sakebitagatterunda. 7.5 28725.0\n", + "19 Schick x Evangelion 5.0 31115.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputAnime = inputAnime.dropna(subset=['anime_id'])\n", + "inputAnime = inputAnime.reset_index(drop=True)\n", + "\n", + "inputAnime" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6c203367", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idanime_idrating
2206853232739
40210089219958
60135393310
760289553228110
762239822319-1
............
2925331121153510
29254581256153510
292555614421648
29256699625219958
292568536521649
\n", + "

17078 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " user_id anime_id rating\n", + "220 6853 23273 9\n", + "402 10089 21995 8\n", + "601 3539 33 10\n", + "760 28955 32281 10\n", + "762 2398 22319 -1\n", + "... ... ... ...\n", + "2925331 121 1535 10\n", + "2925458 1256 1535 10\n", + "2925556 1442 164 8\n", + "2925669 9625 21995 8\n", + "2925685 3652 164 9\n", + "\n", + "[17078 rows x 3 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userSubset = rating_df[rating_df['anime_id'].isin(inputAnime['anime_id'].tolist())]\n", + "userSubset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "dd719111", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idcountsanime_ids
55268715[21995, 2236, 1689, 22319, 1535, 164, 28725, 2...
920114514[16498, 31043, 21995, 28725, 2236, 31478, 2327...
2479333814[28851, 32281, 31478, 22319, 21995, 2236, 1689...
62578414[28223, 22319, 1535, 32729, 21995, 31043, 1649...
60676013[22319, 28223, 31043, 31478, 199, 32729, 1535,...
............
7082680911[28725]
7083681771[28725]
7084683201[28725]
7085684051[28725]
7086685591[28725]
\n", + "

7133 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " user_id counts anime_ids\n", + "552 687 15 [21995, 2236, 1689, 22319, 1535, 164, 28725, 2...\n", + "920 1145 14 [16498, 31043, 21995, 28725, 2236, 31478, 2327...\n", + "2479 3338 14 [28851, 32281, 31478, 22319, 21995, 2236, 1689...\n", + "625 784 14 [28223, 22319, 1535, 32729, 21995, 31043, 1649...\n", + "606 760 13 [22319, 28223, 31043, 31478, 199, 32729, 1535,...\n", + "... ... ... ...\n", + "7082 68091 1 [28725]\n", + "7083 68177 1 [28725]\n", + "7084 68320 1 [28725]\n", + "7085 68405 1 [28725]\n", + "7086 68559 1 [28725]\n", + "\n", + "[7133 rows x 3 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userSubsetGroup = userSubset.groupby('user_id').agg(\n", + " counts=('anime_id', 'count'), # Count of anime\n", + " anime_ids=('anime_id', lambda x: list(x)) # List of anime IDs\n", + ").reset_index()\n", + "\n", + "userSubsetGroup = userSubsetGroup.sort_values(by='counts', ascending=False)\n", + "userSubsetGroup" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "637a3307", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idcountsanime_ids
55268715[21995, 2236, 1689, 22319, 1535, 164, 28725, 2...
920114514[16498, 31043, 21995, 28725, 2236, 31478, 2327...
2479333814[28851, 32281, 31478, 22319, 21995, 2236, 1689...
62578414[28223, 22319, 1535, 32729, 21995, 31043, 1649...
60676013[22319, 28223, 31043, 31478, 199, 32729, 1535,...
............
960119710[1689, 16498, 21995, 28725, 28223, 23273, 3147...
1519187010[21995, 164, 31478, 31043, 28223, 22319, 11111...
1530188910[21995, 23273, 11111, 199, 33, 31043, 164, 168...
1752224310[21995, 32729, 1689, 28223, 22319, 11111, 3228...
1495183710[28223, 22319, 31043, 164, 2236, 199, 23273, 2...
\n", + "

97 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " user_id counts anime_ids\n", + "552 687 15 [21995, 2236, 1689, 22319, 1535, 164, 28725, 2...\n", + "920 1145 14 [16498, 31043, 21995, 28725, 2236, 31478, 2327...\n", + "2479 3338 14 [28851, 32281, 31478, 22319, 21995, 2236, 1689...\n", + "625 784 14 [28223, 22319, 1535, 32729, 21995, 31043, 1649...\n", + "606 760 13 [22319, 28223, 31043, 31478, 199, 32729, 1535,...\n", + "... ... ... ...\n", + "960 1197 10 [1689, 16498, 21995, 28725, 28223, 23273, 3147...\n", + "1519 1870 10 [21995, 164, 31478, 31043, 28223, 22319, 11111...\n", + "1530 1889 10 [21995, 23273, 11111, 199, 33, 31043, 164, 168...\n", + "1752 2243 10 [21995, 32729, 1689, 28223, 22319, 11111, 3228...\n", + "1495 1837 10 [28223, 22319, 31043, 164, 2236, 199, 23273, 2...\n", + "\n", + "[97 rows x 3 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userSubsetGroup = userSubsetGroup[userSubsetGroup['counts'] >= 10]\n", + "userSubsetGroup" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4d44029f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User ID: 687, Pearson Correlation: -0.23953974895397492\n", + "User ID: 1145, Pearson Correlation: -0.018181818181818202\n", + "User ID: 3338, Pearson Correlation: 0.18984771573604062\n", + "User ID: 784, Pearson Correlation: 0.14410480349344976\n", + "User ID: 760, Pearson Correlation: -0.33180428134556567\n", + "User ID: 392, Pearson Correlation: 0.12532981530343007\n", + "User ID: 446, Pearson Correlation: -0.6027397260273972\n", + "User ID: 17, Pearson Correlation: -0.4708029197080291\n", + "User ID: 342, Pearson Correlation: 0.10817941952506599\n", + "User ID: 786, Pearson Correlation: -0.0633245382585752\n", + "User ID: 1497, Pearson Correlation: 0.294811320754717\n", + "User ID: 963, Pearson Correlation: 0.08401084010840111\n", + "User ID: 2378, Pearson Correlation: 0.3827751196172249\n", + "User ID: 813, Pearson Correlation: -0.1868131868131868\n", + "User ID: 958, Pearson Correlation: -0.3411764705882353\n", + "User ID: 938, Pearson Correlation: 0.6970873786407766\n", + "User ID: 198, Pearson Correlation: 0.1\n", + "User ID: 562, Pearson Correlation: 0.1047957371225577\n", + "User ID: 1013, Pearson Correlation: 0.6026490066225164\n", + "User ID: 1597, Pearson Correlation: -0.2529411764705883\n", + "User ID: 3299, Pearson Correlation: -0.10982658959537578\n", + "User ID: 1222, Pearson Correlation: -0.03311258278145697\n", + "User ID: 123, Pearson Correlation: 0.15761589403973508\n", + "User ID: 77, Pearson Correlation: 0.10773899848254934\n", + "User ID: 1378, Pearson Correlation: 0.2\n", + "User ID: 1344, Pearson Correlation: 0.039999999999999994\n", + "User ID: 1327, Pearson Correlation: -0.3511111111111111\n", + "User ID: 1813, Pearson Correlation: -0.041176470588235294\n", + "User ID: 271, Pearson Correlation: 0.536082474226804\n", + "User ID: 1290, Pearson Correlation: 0.05962521294718907\n", + "User ID: 1019, Pearson Correlation: 0.31746031746031733\n", + "User ID: 478, Pearson Correlation: 0.3470588235294118\n", + "User ID: 2864, Pearson Correlation: -0.19111111111111112\n", + "User ID: 1456, Pearson Correlation: 0.27472527472527475\n", + "User ID: 1274, Pearson Correlation: -0.1502145922746781\n", + "User ID: 651, Pearson Correlation: 0.10724637681159421\n", + "User ID: 611, Pearson Correlation: -0.33333333333333337\n", + "User ID: 2318, Pearson Correlation: 0.09130434782608696\n", + "User ID: 1176, Pearson Correlation: 0.6577777777777776\n", + "User ID: 2200, Pearson Correlation: -0.6019417475728154\n", + "User ID: 585, Pearson Correlation: 1.7094345118926147e-18\n", + "User ID: 2016, Pearson Correlation: 0.35069444444444436\n", + "User ID: 1394, Pearson Correlation: -0.021428571428571443\n", + "User ID: 1116, Pearson Correlation: -0.03125000000000002\n", + "User ID: 1620, Pearson Correlation: -0.12780898876404492\n", + "User ID: 1435, Pearson Correlation: 0.3058252427184466\n", + "User ID: 1442, Pearson Correlation: -0.0298804780876494\n", + "User ID: 395, Pearson Correlation: 0.06382978723404255\n", + "User ID: 3284, Pearson Correlation: 0.28864353312302843\n", + "User ID: 3360, Pearson Correlation: -0.04532163742690057\n", + "User ID: 2951, Pearson Correlation: -0.8857142857142858\n", + "User ID: 1418, Pearson Correlation: -0.35714285714285704\n", + "User ID: 2555, Pearson Correlation: 0.017182130584192438\n", + "User ID: 2867, Pearson Correlation: 0.032407407407407406\n", + "User ID: 79, Pearson Correlation: -0.02325581395348838\n", + "User ID: 139, Pearson Correlation: -0.028037383177570086\n", + "User ID: 1815, Pearson Correlation: -0.20833333333333337\n", + "User ID: 1814, Pearson Correlation: -0.3707165109034268\n", + "User ID: 1918, Pearson Correlation: 0.05241935483870963\n", + "User ID: 2256, Pearson Correlation: -0.4411247803163445\n", + "User ID: 228, Pearson Correlation: -0.2588652482269503\n", + "User ID: 565, Pearson Correlation: 0.024822695035461\n", + "User ID: 744, Pearson Correlation: 0.45895522388059706\n", + "User ID: 4102, Pearson Correlation: -0.6903409090909091\n", + "User ID: 1501, Pearson Correlation: -0.16465863453815263\n", + "User ID: 1504, Pearson Correlation: -0.5151515151515152\n", + "User ID: 1343, Pearson Correlation: 0.019607843137254957\n", + "User ID: 4565, Pearson Correlation: 0.13043478260869568\n", + "User ID: 861, Pearson Correlation: -0.44\n", + "User ID: 2202, Pearson Correlation: -0.023454157782515993\n", + "User ID: 553, Pearson Correlation: 0.23809523809523808\n", + "User ID: 4512, Pearson Correlation: 0.13043478260869568\n", + "User ID: 1237, Pearson Correlation: -0.5329768270944742\n", + "User ID: 1252, Pearson Correlation: -0.044176706827309245\n", + "User ID: 1284, Pearson Correlation: -0.02083333333333333\n", + "User ID: 926, Pearson Correlation: -0.11475409836065573\n", + "User ID: 995, Pearson Correlation: -0.40909090909090906\n", + "User ID: 1419, Pearson Correlation: 0.8333333333333333\n", + "User ID: 1579, Pearson Correlation: -0.06339468302658491\n", + "User ID: 1578, Pearson Correlation: -0.045871559633027505\n", + "User ID: 244, Pearson Correlation: -0.20567375886524816\n", + "User ID: 1711, Pearson Correlation: -0.17021276595744678\n", + "User ID: 1400, Pearson Correlation: 0.483065953654189\n", + "User ID: 894, Pearson Correlation: -0.018518518518518556\n", + "User ID: 2025, Pearson Correlation: 0.3023255813953489\n", + "User ID: 598, Pearson Correlation: -0.22340425531914884\n", + "User ID: 1023, Pearson Correlation: 0.21985815602836875\n", + "User ID: 2273, Pearson Correlation: 0.09274193548387095\n", + "User ID: 1605, Pearson Correlation: -0.4262820512820513\n", + "User ID: 1963, Pearson Correlation: 0.3055555555555556\n", + "User ID: 1167, Pearson Correlation: -0.2121212121212121\n", + "User ID: 444, Pearson Correlation: 0.45512820512820523\n", + "User ID: 1197, Pearson Correlation: -0.5261044176706828\n", + "User ID: 1870, Pearson Correlation: -0.3750000000000001\n", + "User ID: 1889, Pearson Correlation: -0.2126537785588753\n", + "User ID: 2243, Pearson Correlation: 0.0657439446366782\n", + "User ID: 1837, Pearson Correlation: -0.22699386503067487\n" + ] + } + ], + "source": [ + "animeRating_dict = inputAnime.set_index('anime_id')['Rating'].to_dict()\n", + "\n", + "pearsonCorrelation_dict = {}\n", + "\n", + "for index, row in userSubsetGroup.iterrows():\n", + " user_id = row['user_id']\n", + " user_anime_ids = row['anime_ids']\n", + " \n", + " # Get corresponding ratings for the user's anime ids\n", + " user_ratings = [animeRating_dict[anime_id] for anime_id in user_anime_ids if anime_id in animeRating_dict]\n", + " \n", + " # Calculate Pearson correlation with inputAnime ratings (use it as a baseline)\n", + " if user_ratings:\n", + " input_ratings = [animeRating_dict[anime_id] for anime_id in inputAnime['anime_id'] if anime_id in user_anime_ids]\n", + " \n", + " # Ensure both lists have the same length\n", + " if len(user_ratings) == len(input_ratings) and len(user_ratings) > 0:\n", + " correlation, _ = pearsonr(user_ratings, input_ratings)\n", + " pearsonCorrelation_dict[user_id] = correlation\n", + " else:\n", + " pearsonCorrelation_dict[user_id] = 0\n", + " else:\n", + " pearsonCorrelation_dict[user_id] = 0\n", + "\n", + "# Display the results\n", + "for user_id, correlation in pearsonCorrelation_dict.items():\n", + " print(f\"User ID: {user_id}, Pearson Correlation: {correlation}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5c852fca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Similarity Indexuser_id
0-0.239540687
1-0.0181821145
20.1898483338
30.144105784
4-0.331804760
.........
92-0.5261041197
93-0.3750001870
94-0.2126541889
950.0657442243
96-0.2269941837
\n", + "

97 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Similarity Index user_id\n", + "0 -0.239540 687\n", + "1 -0.018182 1145\n", + "2 0.189848 3338\n", + "3 0.144105 784\n", + "4 -0.331804 760\n", + ".. ... ...\n", + "92 -0.526104 1197\n", + "93 -0.375000 1870\n", + "94 -0.212654 1889\n", + "95 0.065744 2243\n", + "96 -0.226994 1837\n", + "\n", + "[97 rows x 2 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pearsonDF = pd.DataFrame.from_dict(pearsonCorrelation_dict, orient='index')\n", + "pearsonDF.columns = ['Similarity Index']\n", + "pearsonDF['user_id'] = pearsonDF.index\n", + "pearsonDF.index = range(len(pearsonDF))\n", + "pearsonDF" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5e86597a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Similarity Indexuser_id
770.8333331419
150.697087938
380.6577781176
180.6026491013
280.536082271
820.4830661400
620.458955744
910.455128444
120.3827752378
410.3506942016
310.347059478
300.3174601019
450.3058251435
890.3055561963
840.3023262025
100.2948111497
480.2886443284
330.2747251456
700.238095553
860.2198581023
240.2000001378
20.1898483338
220.157616123
30.144105784
670.1304354565
710.1304354512
50.125330392
80.108179342
230.10773977
350.107246651
170.104796562
160.100000198
870.0927422273
370.0913042318
110.084011963
950.0657442243
470.063830395
290.0596251290
580.0524191918
250.0400001344
\n", + "
" + ], + "text/plain": [ + " Similarity Index user_id\n", + "77 0.833333 1419\n", + "15 0.697087 938\n", + "38 0.657778 1176\n", + "18 0.602649 1013\n", + "28 0.536082 271\n", + "82 0.483066 1400\n", + "62 0.458955 744\n", + "91 0.455128 444\n", + "12 0.382775 2378\n", + "41 0.350694 2016\n", + "31 0.347059 478\n", + "30 0.317460 1019\n", + "45 0.305825 1435\n", + "89 0.305556 1963\n", + "84 0.302326 2025\n", + "10 0.294811 1497\n", + "48 0.288644 3284\n", + "33 0.274725 1456\n", + "70 0.238095 553\n", + "86 0.219858 1023\n", + "24 0.200000 1378\n", + "2 0.189848 3338\n", + "22 0.157616 123\n", + "3 0.144105 784\n", + "67 0.130435 4565\n", + "71 0.130435 4512\n", + "5 0.125330 392\n", + "8 0.108179 342\n", + "23 0.107739 77\n", + "35 0.107246 651\n", + "17 0.104796 562\n", + "16 0.100000 198\n", + "87 0.092742 2273\n", + "37 0.091304 2318\n", + "11 0.084011 963\n", + "95 0.065744 2243\n", + "47 0.063830 395\n", + "29 0.059625 1290\n", + "58 0.052419 1918\n", + "25 0.040000 1344" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topUsers = pearsonDF.sort_values(by='Similarity Index', ascending=False)[0:40]\n", + "topUsers" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "82c7d302", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Similarity Indexuser_idanime_idrating
00.83333314192331910
10.83333314193104310
20.8333331419290410
30.8333331419133919
40.83333314191418910
...............
181810.0400001344239-1
181820.0400001344304139
181830.04000013449863-1
181840.040000134490417
181850.04000013443654-1
\n", + "

18186 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " Similarity Index user_id anime_id rating\n", + "0 0.833333 1419 23319 10\n", + "1 0.833333 1419 31043 10\n", + "2 0.833333 1419 2904 10\n", + "3 0.833333 1419 13391 9\n", + "4 0.833333 1419 14189 10\n", + "... ... ... ... ...\n", + "18181 0.040000 1344 239 -1\n", + "18182 0.040000 1344 30413 9\n", + "18183 0.040000 1344 9863 -1\n", + "18184 0.040000 1344 9041 7\n", + "18185 0.040000 1344 3654 -1\n", + "\n", + "[18186 rows x 4 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topUsersRating=topUsers.merge(rating_df, left_on='user_id', right_on='user_id', how='inner')\n", + "topUsersRating" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8d7f5f46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topUsersRating['weighted Rating'] = topUsersRating['Similarity Index']*topUsersRating['rating']\n", + "topUsersRating.head" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "461da216", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sum of similarity Indexsum of weighted Rating
anime_id
12.94441919.664045
50.8458585.965146
61.2687086.559716
71.0037688.823478
151.0125946.341339
.........
340850.0927420.649194
341030.4681512.845150
341360.0400000.320000
341730.274725-0.274725
342403.38889729.199424
\n", + "

3503 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sum of similarity Index sum of weighted Rating\n", + "anime_id \n", + "1 2.944419 19.664045\n", + "5 0.845858 5.965146\n", + "6 1.268708 6.559716\n", + "7 1.003768 8.823478\n", + "15 1.012594 6.341339\n", + "... ... ...\n", + "34085 0.092742 0.649194\n", + "34103 0.468151 2.845150\n", + "34136 0.040000 0.320000\n", + "34173 0.274725 -0.274725\n", + "34240 3.388897 29.199424\n", + "\n", + "[3503 rows x 2 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tempTopUsersRating = topUsersRating.groupby('anime_id').sum()[['Similarity Index','weighted Rating']]\n", + "tempTopUsersRating.columns = ['sum of similarity Index','sum of weighted Rating']\n", + "tempTopUsersRating" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b87e3678", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weighted average recommendation scoreanime_id
anime_id
16.6784131
57.0521865
65.1703896
78.7903557
156.26246815
\n", + "
" + ], + "text/plain": [ + " weighted average recommendation score anime_id\n", + "anime_id \n", + "1 6.678413 1\n", + "5 7.052186 5\n", + "6 5.170389 6\n", + "7 8.790355 7\n", + "15 6.262468 15" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recommendation_df = pd.DataFrame()\n", + "recommendation_df['weighted average recommendation score'] = tempTopUsersRating['sum of weighted Rating']/tempTopUsersRating['sum of similarity Index']\n", + "recommendation_df['anime_id'] = tempTopUsersRating.index\n", + "recommendation_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "60a0e629", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weighted average recommendation scoreanime_id
anime_id
324710.03247
1015310.010153
2847910.028479
2868310.028683
44110.0441
44610.0446
282510.02825
44910.0449
1614310.016143
156610.01566
\n", + "
" + ], + "text/plain": [ + " weighted average recommendation score anime_id\n", + "anime_id \n", + "3247 10.0 3247\n", + "10153 10.0 10153\n", + "28479 10.0 28479\n", + "28683 10.0 28683\n", + "441 10.0 441\n", + "446 10.0 446\n", + "2825 10.0 2825\n", + "449 10.0 449\n", + "16143 10.0 16143\n", + "1566 10.0 1566" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recommendation_df = recommendation_df.sort_values(by='weighted average recommendation score', ascending=False)\n", + "recommendation_df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1dc56a93", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anime_idnamegenretypeepisodesratingmembers
2839449InuYasha: Guren no HouraijimaAdventure, Comedy, Demons, Drama, Historical, ...Movie17.6250008
314116143One Piece: Kinkyuu Kikaku One Piece Kanzen Kou...Adventure, Comedy, Fantasy, ShounenSpecial17.365914
679810153Mahou Shoujo Lyrical Nanoha: The Movie 2nd A&#...Action, Comedy, Drama, Magic, Super PowerMovie18.3413315
689528683One Piece: Episode of Alabasta - PrologueAction, Adventure, Fantasy, ShounenOVA17.414225
7325441Shoujo Kakumei Utena: Adolescence MokushirokuDementia, Drama, Fantasy, Romance, ShoujoMovie17.5922219
7968446Weiß Kreuz GlühenAction, Drama, ShounenTV136.727043
85283247Love Hina Final SelectionComedy, Ecchi, Harem, RomanceOVA17.3221824
86361566Ghost in the Shell: Stand Alone Complex - Soli...Mecha, Military, Mystery, Police, Sci-Fi, SeinenSpecial18.2255247
1157428479Detective Conan Movie 19: The Hellfire SunflowersAction, Mystery, Police, ShounenMovie17.778600
122742825Arabian Nights: Sindbad no Bouken (TV)Adventure, Fantasy, Magic, RomanceTV527.262631
\n", + "
" + ], + "text/plain": [ + " anime_id name \\\n", + "2839 449 InuYasha: Guren no Houraijima \n", + "3141 16143 One Piece: Kinkyuu Kikaku One Piece Kanzen Kou... \n", + "6798 10153 Mahou Shoujo Lyrical Nanoha: The Movie 2nd A&#... \n", + "6895 28683 One Piece: Episode of Alabasta - Prologue \n", + "7325 441 Shoujo Kakumei Utena: Adolescence Mokushiroku \n", + "7968 446 Weiß Kreuz Glühen \n", + "8528 3247 Love Hina Final Selection \n", + "8636 1566 Ghost in the Shell: Stand Alone Complex - Soli... \n", + "11574 28479 Detective Conan Movie 19: The Hellfire Sunflowers \n", + "12274 2825 Arabian Nights: Sindbad no Bouken (TV) \n", + "\n", + " genre type episodes \\\n", + "2839 Adventure, Comedy, Demons, Drama, Historical, ... Movie 1 \n", + "3141 Adventure, Comedy, Fantasy, Shounen Special 1 \n", + "6798 Action, Comedy, Drama, Magic, Super Power Movie 1 \n", + "6895 Action, Adventure, Fantasy, Shounen OVA 1 \n", + "7325 Dementia, Drama, Fantasy, Romance, Shoujo Movie 1 \n", + "7968 Action, Drama, Shounen TV 13 \n", + "8528 Comedy, Ecchi, Harem, Romance OVA 1 \n", + "8636 Mecha, Military, Mystery, Police, Sci-Fi, Seinen Special 1 \n", + "11574 Action, Mystery, Police, Shounen Movie 1 \n", + "12274 Adventure, Fantasy, Magic, Romance TV 52 \n", + "\n", + " rating members \n", + "2839 7.62 50008 \n", + "3141 7.36 5914 \n", + "6798 8.34 13315 \n", + "6895 7.41 4225 \n", + "7325 7.59 22219 \n", + "7968 6.72 7043 \n", + "8528 7.32 21824 \n", + "8636 8.22 55247 \n", + "11574 7.77 8600 \n", + "12274 7.26 2631 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df.loc[anime_df['anime_id'].isin(recommendation_df.head(10)['anime_id'].tolist())]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recommender_system/collaborative/dataset/archive.zip b/recommender_system/collaborative/dataset/archive.zip new file mode 100644 index 0000000..0baead8 Binary files /dev/null and b/recommender_system/collaborative/dataset/archive.zip differ diff --git a/recommender_system/content_based/.gitignore b/recommender_system/content_based/.gitignore new file mode 100644 index 0000000..985e24b --- /dev/null +++ b/recommender_system/content_based/.gitignore @@ -0,0 +1,3 @@ +*.csv +.ipynb_checkpoints +*.png diff --git a/recommender_system/content_based/content_based.ipynb b/recommender_system/content_based/content_based.ipynb new file mode 100644 index 0000000..2dc8a7d --- /dev/null +++ b/recommender_system/content_based/content_based.ipynb @@ -0,0 +1,4663 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "29b5381a-72b4-4f81-9208-2075f7acad85", + "metadata": {}, + "source": [ + "# Content Based Recommender System" + ] + }, + { + "cell_type": "markdown", + "id": "cac03d32", + "metadata": {}, + "source": [ + "Configure the project. Indeed you create a dataset in csv format." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6f480cda-8380-4355-998a-5c59d6203b05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: ./dataset/archive.zip\n", + " inflating: anime.csv \n", + " inflating: rating_complete.csv " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "! rm -rf *.csv\n", + "! unzip ./dataset/archive.zip" + ] + }, + { + "cell_type": "markdown", + "id": "52ec2f48", + "metadata": {}, + "source": [ + "Import needed libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd17f780", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "57b33a77", + "metadata": {}, + "source": [ + "Read data from csv files using pandas and store in data frame structure. Also shuffle data to have uniform distribution. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a102a751", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Score-10Score-9Score-8Score-7Score-6Score-5Score-4Score-3Score-2Score-1
040176Miru Tights: Cosplay Satsuei Tights6.53Ecchi, SchoolUnknownみるタイツ コスプレ撮影 タイツSpecial1Aug 23, 2019Unknown...875.0350.0762.01526.01542.0924.0384.0245.0162.0148.0
113969Thermae Romae x Yoyogi Animation Gakuin Collab...6.29Comedy, Historical, SeinenUnknownテルマエ・ロマエx代々木アニメーション学院企業コラボレーションSpecial1Jul 9, 2012Unknown...35.047.0114.0253.0240.0162.063.029.010.010.0
213459Ribbon-chanUnknownComedyUnknownリボンちゃんTV24Apr 4, 2012 to Mar 27, 2013Spring 2012...7.0UnknownUnknown2.02.04.01.0Unknown2.07.0
315617Jinrui wa Suitai Shimashita Specials7.23Comedy, Fantasy, SeinenHumanity Has Declined Specials人類は衰退しましたSpecial6Sep 19, 2012 to Feb 20, 2013Unknown...451.0885.02432.03038.01388.0588.0130.038.022.019.0
419157Youkai Watch6.54Comedy, Demons, Kids, SupernaturalYo-kai Watch妖怪ウォッチTV214Jan 8, 2014 to Mar 30, 2018Winter 2014...517.0532.01141.01912.01636.01196.0500.0228.0138.0125.0
\n", + "

5 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "0 40176 Miru Tights: Cosplay Satsuei Tights 6.53 \n", + "1 13969 Thermae Romae x Yoyogi Animation Gakuin Collab... 6.29 \n", + "2 13459 Ribbon-chan Unknown \n", + "3 15617 Jinrui wa Suitai Shimashita Specials 7.23 \n", + "4 19157 Youkai Watch 6.54 \n", + "\n", + " Genres English name \\\n", + "0 Ecchi, School Unknown \n", + "1 Comedy, Historical, Seinen Unknown \n", + "2 Comedy Unknown \n", + "3 Comedy, Fantasy, Seinen Humanity Has Declined Specials \n", + "4 Comedy, Demons, Kids, Supernatural Yo-kai Watch \n", + "\n", + " Japanese name Type Episodes \\\n", + "0 みるタイツ コスプレ撮影 タイツ Special 1 \n", + "1 テルマエ・ロマエx代々木アニメーション学院企業コラボレーション Special 1 \n", + "2 リボンちゃん TV 24 \n", + "3 人類は衰退しました Special 6 \n", + "4 妖怪ウォッチ TV 214 \n", + "\n", + " Aired Premiered ... Score-10 Score-9 Score-8 \\\n", + "0 Aug 23, 2019 Unknown ... 875.0 350.0 762.0 \n", + "1 Jul 9, 2012 Unknown ... 35.0 47.0 114.0 \n", + "2 Apr 4, 2012 to Mar 27, 2013 Spring 2012 ... 7.0 Unknown Unknown \n", + "3 Sep 19, 2012 to Feb 20, 2013 Unknown ... 451.0 885.0 2432.0 \n", + "4 Jan 8, 2014 to Mar 30, 2018 Winter 2014 ... 517.0 532.0 1141.0 \n", + "\n", + " Score-7 Score-6 Score-5 Score-4 Score-3 Score-2 Score-1 \n", + "0 1526.0 1542.0 924.0 384.0 245.0 162.0 148.0 \n", + "1 253.0 240.0 162.0 63.0 29.0 10.0 10.0 \n", + "2 2.0 2.0 4.0 1.0 Unknown 2.0 7.0 \n", + "3 3038.0 1388.0 588.0 130.0 38.0 22.0 19.0 \n", + "4 1912.0 1636.0 1196.0 500.0 228.0 138.0 125.0 \n", + "\n", + "[5 rows x 35 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df = pd.read_csv(\"anime.csv\")\n", + "anime_df = anime_df.sample(frac=1.0, random_state=42).reset_index(drop=True)\n", + "anime_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e2186ae4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idanime_idrating
0126602181993
11626153903610
225497341242
3360396075
41032260810
\n", + "
" + ], + "text/plain": [ + " user_id anime_id rating\n", + "0 126602 18199 3\n", + "1 162615 39036 10\n", + "2 25497 34124 2\n", + "3 360 39607 5\n", + "4 1032 2608 10" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rates_df = pd.read_csv(\"rating_complete.csv\")\n", + "rates_df = rates_df.sample(frac=1.0, random_state=42).reset_index(drop=True)\n", + "rates_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6c22db06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user_id\n", + "68042 4734\n", + "10255 4509\n", + "162615 4474\n", + "189037 4260\n", + "38143 3544\n", + " ... \n", + "125529 1\n", + "310701 1\n", + "287547 1\n", + "182161 1\n", + "44941 1\n", + "Name: count, Length: 27341, dtype: int64\n", + "\n", + "################################################################################\n", + "\n", + "anime_id\n", + "32219 30\n", + "1914 30\n", + "1720 30\n", + "36672 30\n", + "3162 30\n", + " ..\n", + "40959 1\n", + "40674 1\n", + "39685 1\n", + "40594 1\n", + "42144 1\n", + "Name: count, Length: 16872, dtype: int64\n" + ] + } + ], + "source": [ + "print(rates_df['user_id'].value_counts())\n", + "print(\"\\n\" + \"#\" * 80 + \"\\n\")\n", + "print(rates_df['anime_id'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "93002df5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idanime_idrating
count447990.000000447990.000000447990.000000
mean48705.90819419412.4196866.258184
std78817.29378814646.0095592.143759
min0.0000001.0000001.000000
25%1578.0000004654.0000005.000000
50%10457.00000017731.0000006.000000
75%61131.25000034122.0000008.000000
max353328.00000048456.00000010.000000
\n", + "
" + ], + "text/plain": [ + " user_id anime_id rating\n", + "count 447990.000000 447990.000000 447990.000000\n", + "mean 48705.908194 19412.419686 6.258184\n", + "std 78817.293788 14646.009559 2.143759\n", + "min 0.000000 1.000000 1.000000\n", + "25% 1578.000000 4654.000000 5.000000\n", + "50% 10457.000000 17731.000000 6.000000\n", + "75% 61131.250000 34122.000000 8.000000\n", + "max 353328.000000 48456.000000 10.000000" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# summarize data\n", + "rates_df.describe() " + ] + }, + { + "cell_type": "markdown", + "id": "565d6f79", + "metadata": {}, + "source": [ + "## Data Cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "045ff064", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',\n", + " 'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',\n", + " 'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',\n", + " 'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',\n", + " 'Plan to Watch', 'Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6',\n", + " 'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "678f36ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Score-10Score-9Score-8Score-7Score-6Score-5Score-4Score-3Score-2Score-1
040176Miru Tights: Cosplay Satsuei Tights6.53[Ecchi, School]Unknownみるタイツ コスプレ撮影 タイツSpecial1Aug 23, 2019Unknown...875.0350.0762.01526.01542.0924.0384.0245.0162.0148.0
113969Thermae Romae x Yoyogi Animation Gakuin Collab...6.29[Comedy, Historical, Seinen]Unknownテルマエ・ロマエx代々木アニメーション学院企業コラボレーションSpecial1Jul 9, 2012Unknown...35.047.0114.0253.0240.0162.063.029.010.010.0
213459Ribbon-chanUnknown[Comedy]UnknownリボンちゃんTV24Apr 4, 2012 to Mar 27, 2013Spring 2012...7.0UnknownUnknown2.02.04.01.0Unknown2.07.0
315617Jinrui wa Suitai Shimashita Specials7.23[Comedy, Fantasy, Seinen]Humanity Has Declined Specials人類は衰退しましたSpecial6Sep 19, 2012 to Feb 20, 2013Unknown...451.0885.02432.03038.01388.0588.0130.038.022.019.0
419157Youkai Watch6.54[Comedy, Demons, Kids, Supernatural]Yo-kai Watch妖怪ウォッチTV214Jan 8, 2014 to Mar 30, 2018Winter 2014...517.0532.01141.01912.01636.01196.0500.0228.0138.0125.0
\n", + "

5 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "0 40176 Miru Tights: Cosplay Satsuei Tights 6.53 \n", + "1 13969 Thermae Romae x Yoyogi Animation Gakuin Collab... 6.29 \n", + "2 13459 Ribbon-chan Unknown \n", + "3 15617 Jinrui wa Suitai Shimashita Specials 7.23 \n", + "4 19157 Youkai Watch 6.54 \n", + "\n", + " Genres English name \\\n", + "0 [Ecchi, School] Unknown \n", + "1 [Comedy, Historical, Seinen] Unknown \n", + "2 [Comedy] Unknown \n", + "3 [Comedy, Fantasy, Seinen] Humanity Has Declined Specials \n", + "4 [Comedy, Demons, Kids, Supernatural] Yo-kai Watch \n", + "\n", + " Japanese name Type Episodes \\\n", + "0 みるタイツ コスプレ撮影 タイツ Special 1 \n", + "1 テルマエ・ロマエx代々木アニメーション学院企業コラボレーション Special 1 \n", + "2 リボンちゃん TV 24 \n", + "3 人類は衰退しました Special 6 \n", + "4 妖怪ウォッチ TV 214 \n", + "\n", + " Aired Premiered ... Score-10 Score-9 Score-8 \\\n", + "0 Aug 23, 2019 Unknown ... 875.0 350.0 762.0 \n", + "1 Jul 9, 2012 Unknown ... 35.0 47.0 114.0 \n", + "2 Apr 4, 2012 to Mar 27, 2013 Spring 2012 ... 7.0 Unknown Unknown \n", + "3 Sep 19, 2012 to Feb 20, 2013 Unknown ... 451.0 885.0 2432.0 \n", + "4 Jan 8, 2014 to Mar 30, 2018 Winter 2014 ... 517.0 532.0 1141.0 \n", + "\n", + " Score-7 Score-6 Score-5 Score-4 Score-3 Score-2 Score-1 \n", + "0 1526.0 1542.0 924.0 384.0 245.0 162.0 148.0 \n", + "1 253.0 240.0 162.0 63.0 29.0 10.0 10.0 \n", + "2 2.0 2.0 4.0 1.0 Unknown 2.0 7.0 \n", + "3 3038.0 1388.0 588.0 130.0 38.0 22.0 19.0 \n", + "4 1912.0 1636.0 1196.0 500.0 228.0 138.0 125.0 \n", + "\n", + "[5 rows x 35 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df['Genres'] = anime_df.Genres.str.split(',')\n", + "anime_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4daa9d0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Super PowerPsychologicalYuriSamuraiMartial ArtsJoseiShoujoSeinenYaoiShounen Ai
040176Miru Tights: Cosplay Satsuei Tights6.53[Ecchi, School]Unknownみるタイツ コスプレ撮影 タイツSpecial1Aug 23, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
113969Thermae Romae x Yoyogi Animation Gakuin Collab...6.29[Comedy, Historical, Seinen]Unknownテルマエ・ロマエx代々木アニメーション学院企業コラボレーションSpecial1Jul 9, 2012Unknown...0.00.00.00.00.00.00.00.00.00.0
213459Ribbon-chanUnknown[Comedy]UnknownリボンちゃんTV24Apr 4, 2012 to Mar 27, 2013Spring 2012...0.00.00.00.00.00.00.00.00.00.0
315617Jinrui wa Suitai Shimashita Specials7.23[Comedy, Fantasy, Seinen]Humanity Has Declined Specials人類は衰退しましたSpecial6Sep 19, 2012 to Feb 20, 2013Unknown...0.00.00.00.00.00.00.00.00.00.0
419157Youkai Watch6.54[Comedy, Demons, Kids, Supernatural]Yo-kai Watch妖怪ウォッチTV214Jan 8, 2014 to Mar 30, 2018Winter 2014...0.00.00.00.00.00.00.00.00.00.0
..................................................................
1755732238Watashi wa, Kairaku Izonshou6.2[Hentai]Unknown私は、快楽依存症OVA2Feb 26, 2016 to May 20, 2016Unknown...0.00.00.00.00.00.00.00.00.00.0
1755833552Mameshiba Bangai-hen5.75[Music, Comedy]Unknown豆しば番外編Special52008 to Jun 20, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
175598476Otome Youkai Zakuro7.47[Demons, Historical, Military, Romance, Se...Zakuroおとめ妖怪 ざくろTV13Oct 5, 2010 to Dec 28, 2010Fall 2010...0.00.00.00.00.00.00.00.00.00.0
17560953Jyu Oh Sei7.26[Action, Sci-Fi, Adventure, Mystery, Drama...Jyu-Oh-Sei:Planet of the Beast King獣王星TV11Apr 14, 2006 to Jun 23, 2006Spring 2006...0.00.00.00.00.00.00.00.00.00.0
1756139769Kimi ni Sekai6.7[Sci-Fi, Music, Fantasy]Unknown君に世界Music1Apr 20, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
\n", + "

17562 rows × 109 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "0 40176 Miru Tights: Cosplay Satsuei Tights 6.53 \n", + "1 13969 Thermae Romae x Yoyogi Animation Gakuin Collab... 6.29 \n", + "2 13459 Ribbon-chan Unknown \n", + "3 15617 Jinrui wa Suitai Shimashita Specials 7.23 \n", + "4 19157 Youkai Watch 6.54 \n", + "... ... ... ... \n", + "17557 32238 Watashi wa, Kairaku Izonshou 6.2 \n", + "17558 33552 Mameshiba Bangai-hen 5.75 \n", + "17559 8476 Otome Youkai Zakuro 7.47 \n", + "17560 953 Jyu Oh Sei 7.26 \n", + "17561 39769 Kimi ni Sekai 6.7 \n", + "\n", + " Genres \\\n", + "0 [Ecchi, School] \n", + "1 [Comedy, Historical, Seinen] \n", + "2 [Comedy] \n", + "3 [Comedy, Fantasy, Seinen] \n", + "4 [Comedy, Demons, Kids, Supernatural] \n", + "... ... \n", + "17557 [Hentai] \n", + "17558 [Music, Comedy] \n", + "17559 [Demons, Historical, Military, Romance, Se... \n", + "17560 [Action, Sci-Fi, Adventure, Mystery, Drama... \n", + "17561 [Sci-Fi, Music, Fantasy] \n", + "\n", + " English name Japanese name \\\n", + "0 Unknown みるタイツ コスプレ撮影 タイツ \n", + "1 Unknown テルマエ・ロマエx代々木アニメーション学院企業コラボレーション \n", + "2 Unknown リボンちゃん \n", + "3 Humanity Has Declined Specials 人類は衰退しました \n", + "4 Yo-kai Watch 妖怪ウォッチ \n", + "... ... ... \n", + "17557 Unknown 私は、快楽依存症 \n", + "17558 Unknown 豆しば番外編 \n", + "17559 Zakuro おとめ妖怪 ざくろ \n", + "17560 Jyu-Oh-Sei:Planet of the Beast King 獣王星 \n", + "17561 Unknown 君に世界 \n", + "\n", + " Type Episodes Aired Premiered ... \\\n", + "0 Special 1 Aug 23, 2019 Unknown ... \n", + "1 Special 1 Jul 9, 2012 Unknown ... \n", + "2 TV 24 Apr 4, 2012 to Mar 27, 2013 Spring 2012 ... \n", + "3 Special 6 Sep 19, 2012 to Feb 20, 2013 Unknown ... \n", + "4 TV 214 Jan 8, 2014 to Mar 30, 2018 Winter 2014 ... \n", + "... ... ... ... ... ... \n", + "17557 OVA 2 Feb 26, 2016 to May 20, 2016 Unknown ... \n", + "17558 Special 5 2008 to Jun 20, 2019 Unknown ... \n", + "17559 TV 13 Oct 5, 2010 to Dec 28, 2010 Fall 2010 ... \n", + "17560 TV 11 Apr 14, 2006 to Jun 23, 2006 Spring 2006 ... \n", + "17561 Music 1 Apr 20, 2019 Unknown ... \n", + "\n", + " Super Power Psychological Yuri Samurai Martial Arts Josei Shoujo \\\n", + "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... \n", + "17557 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17558 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17559 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17560 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17561 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Seinen Yaoi Shounen Ai \n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "17557 0.0 0.0 0.0 \n", + "17558 0.0 0.0 0.0 \n", + "17559 0.0 0.0 0.0 \n", + "17560 0.0 0.0 0.0 \n", + "17561 0.0 0.0 0.0 \n", + "\n", + "[17562 rows x 109 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animeWithGenres_df = anime_df.drop(columns=['Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6', 'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1'])\n", + "\n", + "for index, row in anime_df.iterrows():\n", + " for genre in row['Genres']:\n", + " animeWithGenres_df.at[index, genre] = 1\n", + "\n", + "#Filling in the NaN values with 0 \n", + "animeWithGenres_df = animeWithGenres_df.fillna(0)\n", + "\n", + "\n", + "animeWithGenres_df" + ] + }, + { + "cell_type": "markdown", + "id": "8ec69f8a", + "metadata": {}, + "source": [ + "## Get the input from user" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c32fa601", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleRating
0ERASED10.0
1Violet Evergarden9.5
2Goblin Slayer6.0
3Berserk8.0
4Attack on Titan7.0
5Tokyo Ghoul6.5
6Orange6.0
7Death Parade8.0
8Death Note7.5
9Bungou Stray Dogs7.5
10Weathering With You8.0
11Your Name8.0
12I want to eat your pancreas8.5
13Princess Mononoke7.5
14Spirited Away7.5
15A Silent Voice8.5
16Ao Haru Ride5.5
17The Girl Who Leapt Through Time7.0
18Another7.5
19Demon Slayer7.0
20Your Lie in April8.0
215 Centimeters per Second6.0
22The Anthem of the Heart7.5
23Evangelion5.0
\n", + "
" + ], + "text/plain": [ + " Title Rating\n", + "0 ERASED 10.0\n", + "1 Violet Evergarden 9.5\n", + "2 Goblin Slayer 6.0\n", + "3 Berserk 8.0\n", + "4 Attack on Titan 7.0\n", + "5 Tokyo Ghoul 6.5\n", + "6 Orange 6.0\n", + "7 Death Parade 8.0\n", + "8 Death Note 7.5\n", + "9 Bungou Stray Dogs 7.5\n", + "10 Weathering With You 8.0\n", + "11 Your Name 8.0\n", + "12 I want to eat your pancreas 8.5\n", + "13 Princess Mononoke 7.5\n", + "14 Spirited Away 7.5\n", + "15 A Silent Voice 8.5\n", + "16 Ao Haru Ride 5.5\n", + "17 The Girl Who Leapt Through Time 7.0\n", + "18 Another 7.5\n", + "19 Demon Slayer 7.0\n", + "20 Your Lie in April 8.0\n", + "21 5 Centimeters per Second 6.0\n", + "22 The Anthem of the Heart 7.5\n", + "23 Evangelion 5.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userInput = [\n", + " {'Title':'ERASED', 'Rating':10},\n", + " {'Title':'Violet Evergarden', 'Rating':9.5},\n", + " {'Title':'Goblin Slayer', 'Rating':6},\n", + " {'Title':\"Berserk\", 'Rating':8},\n", + " {'Title':'Attack on Titan', 'Rating':7},\n", + " {'Title':\"Tokyo Ghoul\", 'Rating':6.5},\n", + " {'Title':\"Orange\", 'Rating':6},\n", + " {'Title':\"Death Parade\", 'Rating':8},\n", + " {'Title':\"Death Note\", 'Rating':7.5},\n", + " {'Title':\"Bungou Stray Dogs\", 'Rating':7.5},\n", + " {'Title':\"Weathering With You\", 'Rating':8},\n", + " {'Title':\"Your Name\", 'Rating':8},\n", + " {'Title':\"I want to eat your pancreas\", 'Rating':8.5},\n", + " {'Title':\"Princess Mononoke\", 'Rating':7.5},\n", + " {'Title':\"Spirited Away\", 'Rating':7.5},\n", + " {'Title':\"A Silent Voice\", 'Rating':8.5},\n", + " {'Title':\"Ao Haru Ride\", 'Rating':5.5},\n", + " {'Title':\"The Girl Who Leapt Through Time\", 'Rating':7},\n", + " {'Title':\"Another\", 'Rating':7.5},\n", + " {'Title':\"Demon Slayer\", 'Rating':7},\n", + " {'Title':\"Your Lie in April\", 'Rating':8},\n", + " {'Title':\"5 Centimeters per Second\", 'Rating':6},\n", + " {'Title':\"The Anthem of the Heart\", 'Rating':7.5},\n", + " {'Title':\"Evangelion\", 'Rating':5}\n", + " ] \n", + "inputAnime = pd.DataFrame(userInput)\n", + "inputAnime" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f125a190", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',\n", + " 'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',\n", + " 'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',\n", + " 'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',\n", + " 'Plan to Watch'],\n", + " dtype='object')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animeWithGenres_df.columns[:25]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ecc89a81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleRating
0Boku dake ga Inai Machi10.0
1Violet Evergarden9.5
2Goblin Slayer6.0
3Berserk8.0
4Shingeki no Kyojin7.0
5Tokyo Ghoul6.5
6Orange6.0
7Death Parade8.0
8Death Note7.5
9Bungou Stray Dogs7.5
10Tenki no Ko8.0
11Kimi no Na wa.8.0
12Kimi no Suizou wo Tabetai8.5
13Mononoke Hime7.5
14Sen to Chihiro no Kamikakushi7.5
15Koe no Katachi8.5
16Ao Haru Ride5.5
17Toki wo Kakeru Shoujo7.0
18Another7.5
19Kimetsu no Yaiba7.0
20Shigatsu wa Kimi no Uso8.0
21Byousoku 5 Centimeter6.0
22Kokoro ga Sakebitagatterunda.7.5
23Schick x Evangelion5.0
\n", + "
" + ], + "text/plain": [ + " Title Rating\n", + "0 Boku dake ga Inai Machi 10.0\n", + "1 Violet Evergarden 9.5\n", + "2 Goblin Slayer 6.0\n", + "3 Berserk 8.0\n", + "4 Shingeki no Kyojin 7.0\n", + "5 Tokyo Ghoul 6.5\n", + "6 Orange 6.0\n", + "7 Death Parade 8.0\n", + "8 Death Note 7.5\n", + "9 Bungou Stray Dogs 7.5\n", + "10 Tenki no Ko 8.0\n", + "11 Kimi no Na wa. 8.0\n", + "12 Kimi no Suizou wo Tabetai 8.5\n", + "13 Mononoke Hime 7.5\n", + "14 Sen to Chihiro no Kamikakushi 7.5\n", + "15 Koe no Katachi 8.5\n", + "16 Ao Haru Ride 5.5\n", + "17 Toki wo Kakeru Shoujo 7.0\n", + "18 Another 7.5\n", + "19 Kimetsu no Yaiba 7.0\n", + "20 Shigatsu wa Kimi no Uso 8.0\n", + "21 Byousoku 5 Centimeter 6.0\n", + "22 Kokoro ga Sakebitagatterunda. 7.5\n", + "23 Schick x Evangelion 5.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anime_df['Name_lower'] = anime_df['Name'].str.lower()\n", + "anime_df['English_lower'] = anime_df['English name'].str.lower()\n", + "inputAnime['Title_lower'] = inputAnime['Title'].str.lower()\n", + "\n", + "def find_best_match(Title, anime_df):\n", + " if Title in anime_df['Name_lower'].values:\n", + " return anime_df[anime_df['Name_lower'] == Title]['Name'].values[0]\n", + " elif Title in anime_df['English_lower'].values:\n", + " return anime_df[anime_df['English_lower'] == Title]['Name'].values[0]\n", + " else:\n", + " for idx, row in anime_df.iterrows():\n", + " if Title in row['Name_lower'] or Title in row['English_lower']:\n", + " return row['Name']\n", + " return None\n", + "\n", + "inputAnime['best_match'] = inputAnime['Title_lower'].apply(find_best_match, anime_df=anime_df)\n", + "inputAnime = inputAnime.dropna(subset=['best_match'])\n", + "inputAnime['Title'] = inputAnime['best_match']\n", + "anime_df.drop(['Name_lower', 'English_lower'], axis=1, inplace=True)\n", + "inputAnime.drop(['Title_lower', 'best_match'], axis=1, inplace=True)\n", + "\n", + "inputAnime" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4add6237", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDTitleRating
0164Mononoke Hime7.5
1199Sen to Chihiro no Kamikakushi7.5
21535Death Note7.5
31689Byousoku 5 Centimeter6.0
42236Toki wo Kakeru Shoujo7.0
511111Another7.5
616498Shingeki no Kyojin7.0
721995Ao Haru Ride5.5
822319Tokyo Ghoul6.5
923273Shigatsu wa Kimi no Uso8.0
1028223Death Parade8.0
1128725Kokoro ga Sakebitagatterunda.7.5
1228851Koe no Katachi8.5
1331043Boku dake ga Inai Machi10.0
1431115Schick x Evangelion5.0
1531478Bungou Stray Dogs7.5
1632281Kimi no Na wa.8.0
1732379Berserk8.0
1832729Orange6.0
1933352Violet Evergarden9.5
2036098Kimi no Suizou wo Tabetai8.5
2137349Goblin Slayer6.0
2238000Kimetsu no Yaiba7.0
2338826Tenki no Ko8.0
\n", + "
" + ], + "text/plain": [ + " MAL_ID Title Rating\n", + "0 164 Mononoke Hime 7.5\n", + "1 199 Sen to Chihiro no Kamikakushi 7.5\n", + "2 1535 Death Note 7.5\n", + "3 1689 Byousoku 5 Centimeter 6.0\n", + "4 2236 Toki wo Kakeru Shoujo 7.0\n", + "5 11111 Another 7.5\n", + "6 16498 Shingeki no Kyojin 7.0\n", + "7 21995 Ao Haru Ride 5.5\n", + "8 22319 Tokyo Ghoul 6.5\n", + "9 23273 Shigatsu wa Kimi no Uso 8.0\n", + "10 28223 Death Parade 8.0\n", + "11 28725 Kokoro ga Sakebitagatterunda. 7.5\n", + "12 28851 Koe no Katachi 8.5\n", + "13 31043 Boku dake ga Inai Machi 10.0\n", + "14 31115 Schick x Evangelion 5.0\n", + "15 31478 Bungou Stray Dogs 7.5\n", + "16 32281 Kimi no Na wa. 8.0\n", + "17 32379 Berserk 8.0\n", + "18 32729 Orange 6.0\n", + "19 33352 Violet Evergarden 9.5\n", + "20 36098 Kimi no Suizou wo Tabetai 8.5\n", + "21 37349 Goblin Slayer 6.0\n", + "22 38000 Kimetsu no Yaiba 7.0\n", + "23 38826 Tenki no Ko 8.0" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputAnime = inputAnime.merge(anime_df[['MAL_ID', 'Name']], left_on='Title', right_on='Name', how='left')\n", + "inputAnime = inputAnime[['MAL_ID', 'Title', 'Rating']]\n", + "\n", + "inputAnime = inputAnime.sort_values(by='MAL_ID')\n", + "inputAnime = inputAnime.reset_index(drop=True)\n", + "\n", + "inputAnime" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d1889753", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Super PowerPsychologicalYuriSamuraiMartial ArtsJoseiShoujoSeinenYaoiShounen Ai
12056164Mononoke Hime8.72[Action, Adventure, Fantasy]Princess Mononokeもののけ姫Movie1Jul 12, 1997Unknown...0.00.00.00.00.00.00.00.00.00.0
2741199Sen to Chihiro no Kamikakushi8.83[Adventure, Supernatural, Drama]Spirited Away千と千尋の神隠しMovie1Jul 20, 2001Unknown...0.00.00.00.00.00.00.00.00.00.0
20521535Death Note8.63[Mystery, Police, Psychological, Supernatur...Death NoteデスノートTV37Oct 4, 2006 to Jun 27, 2007Fall 2006...0.00.00.00.00.00.00.00.00.00.0
21401689Byousoku 5 Centimeter7.73[Drama, Romance, Slice of Life]5 Centimeters Per Second秒速5センチメートルMovie3Mar 3, 2007Unknown...0.00.00.00.00.00.00.00.00.00.0
173712236Toki wo Kakeru Shoujo8.2[Adventure, Drama, Romance, Sci-Fi]The Girl Who Leapt Through Time時をかける少女Movie1Jul 15, 2006Unknown...0.00.00.00.00.00.00.00.00.00.0
516411111Another7.55[Mystery, Horror, Supernatural, Thriller, ...AnotherアナザーTV12Jan 10, 2012 to Mar 27, 2012Winter 2012...0.00.00.00.00.00.00.00.00.00.0
393616498Shingeki no Kyojin8.48[Action, Military, Mystery, Super Power, D...Attack on Titan進撃の巨人TV25Apr 7, 2013 to Sep 29, 2013Spring 2013...0.00.00.00.00.00.00.00.00.00.0
1670421995Ao Haru Ride7.67[Comedy, Drama, Romance, School, Shoujo, ...Blue Spring RideアオハライドTV12Jul 8, 2014 to Sep 23, 2014Summer 2014...0.00.00.00.00.00.00.00.00.00.0
1658422319Tokyo Ghoul7.81[Action, Mystery, Horror, Psychological, S...Tokyo Ghoul東京喰種-トーキョーグール-TV12Jul 4, 2014 to Sep 19, 2014Summer 2014...0.00.00.00.00.00.00.00.00.00.0
475223273Shigatsu wa Kimi no Uso8.74[Drama, Music, Romance, School, Shounen]Your Lie in April四月は君の嘘TV22Oct 10, 2014 to Mar 20, 2015Fall 2014...0.00.00.00.00.00.00.00.00.00.0
704528223Death Parade8.2[Game, Mystery, Psychological, Drama, Thri...Death Paradeデス・パレードTV12Jan 10, 2015 to Mar 28, 2015Winter 2015...0.00.00.00.00.00.00.00.00.00.0
764028725Kokoro ga Sakebitagatterunda.7.96[Drama, Romance, School]The Anthem of the Heart心が叫びたがってるんだ。Movie1Sep 19, 2015Unknown...0.00.00.00.00.00.00.00.00.00.0
1141928851Koe no Katachi9.0[Drama, School, Shounen]A Silent Voice聲の形Movie1Sep 17, 2016Unknown...0.00.00.00.00.00.00.00.00.00.0
429331043Boku dake ga Inai Machi8.37[Mystery, Psychological, Supernatural, Seinen]ERASED僕だけがいない街TV12Jan 8, 2016 to Mar 25, 2016Winter 2016...0.00.00.00.00.00.00.00.00.00.0
53731115Schick x Evangelion6.06[Comedy, Parody]UnknownSchick × エヴァンゲリオンSpecial2May 11, 2015Unknown...0.00.00.00.00.00.00.00.00.00.0
910231478Bungou Stray Dogs7.79[Action, Comedy, Mystery, Seinen, Super Po...Bungo Stray Dogs文豪ストレイドッグスTV12Apr 7, 2016 to Jun 23, 2016Spring 2016...0.00.00.00.00.00.00.00.00.00.0
1352032281Kimi no Na wa.8.96[Romance, Supernatural, School, Drama]Your Name.君の名は。Movie1Aug 26, 2016Unknown...0.00.00.00.00.00.00.00.00.00.0
184432379Berserk6.39[Action, Adventure, Demons, Drama, Fantasy...BerserkベルセルクTV12Jul 1, 2016 to Sep 16, 2016Summer 2016...0.00.00.00.00.00.00.00.00.00.0
517832729Orange7.62[Sci-Fi, Drama, Romance, School, Shoujo]Orangeorange(オレンジ)TV13Jul 4, 2016 to Sep 26, 2016Summer 2016...0.00.00.00.00.00.00.00.00.00.0
193333352Violet Evergarden8.64[Slice of Life, Drama, Fantasy]Violet Evergardenヴァイオレット・エヴァーガーデンTV13Jan 11, 2018 to Apr 5, 2018Winter 2018...0.00.00.00.00.00.00.00.00.00.0
1499736098Kimi no Suizou wo Tabetai8.59[Drama]I want to eat your pancreas君の膵臓をたべたいMovie1Sep 1, 2018Unknown...0.00.00.00.00.00.00.00.00.00.0
944437349Goblin Slayer7.46[Action, Adventure, Fantasy]Goblin SlayerゴブリンスレイヤーTV12Oct 7, 2018 to Dec 30, 2018Fall 2018...0.00.00.00.00.00.00.00.00.00.0
1290038000Kimetsu no Yaiba8.62[Action, Demons, Historical, Shounen, Supe...Demon Slayer:Kimetsu no Yaiba鬼滅の刃TV26Apr 6, 2019 to Sep 28, 2019Spring 2019...0.00.00.00.00.00.00.00.00.00.0
1702338826Tenki no Ko8.41[Slice of Life, Drama, Romance, Fantasy]Weathering With You天気の子Movie1Jul 19, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
\n", + "

24 rows × 109 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "12056 164 Mononoke Hime 8.72 \n", + "2741 199 Sen to Chihiro no Kamikakushi 8.83 \n", + "2052 1535 Death Note 8.63 \n", + "2140 1689 Byousoku 5 Centimeter 7.73 \n", + "17371 2236 Toki wo Kakeru Shoujo 8.2 \n", + "5164 11111 Another 7.55 \n", + "3936 16498 Shingeki no Kyojin 8.48 \n", + "16704 21995 Ao Haru Ride 7.67 \n", + "16584 22319 Tokyo Ghoul 7.81 \n", + "4752 23273 Shigatsu wa Kimi no Uso 8.74 \n", + "7045 28223 Death Parade 8.2 \n", + "7640 28725 Kokoro ga Sakebitagatterunda. 7.96 \n", + "11419 28851 Koe no Katachi 9.0 \n", + "4293 31043 Boku dake ga Inai Machi 8.37 \n", + "537 31115 Schick x Evangelion 6.06 \n", + "9102 31478 Bungou Stray Dogs 7.79 \n", + "13520 32281 Kimi no Na wa. 8.96 \n", + "1844 32379 Berserk 6.39 \n", + "5178 32729 Orange 7.62 \n", + "1933 33352 Violet Evergarden 8.64 \n", + "14997 36098 Kimi no Suizou wo Tabetai 8.59 \n", + "9444 37349 Goblin Slayer 7.46 \n", + "12900 38000 Kimetsu no Yaiba 8.62 \n", + "17023 38826 Tenki no Ko 8.41 \n", + "\n", + " Genres \\\n", + "12056 [Action, Adventure, Fantasy] \n", + "2741 [Adventure, Supernatural, Drama] \n", + "2052 [Mystery, Police, Psychological, Supernatur... \n", + "2140 [Drama, Romance, Slice of Life] \n", + "17371 [Adventure, Drama, Romance, Sci-Fi] \n", + "5164 [Mystery, Horror, Supernatural, Thriller, ... \n", + "3936 [Action, Military, Mystery, Super Power, D... \n", + "16704 [Comedy, Drama, Romance, School, Shoujo, ... \n", + "16584 [Action, Mystery, Horror, Psychological, S... \n", + "4752 [Drama, Music, Romance, School, Shounen] \n", + "7045 [Game, Mystery, Psychological, Drama, Thri... \n", + "7640 [Drama, Romance, School] \n", + "11419 [Drama, School, Shounen] \n", + "4293 [Mystery, Psychological, Supernatural, Seinen] \n", + "537 [Comedy, Parody] \n", + "9102 [Action, Comedy, Mystery, Seinen, Super Po... \n", + "13520 [Romance, Supernatural, School, Drama] \n", + "1844 [Action, Adventure, Demons, Drama, Fantasy... \n", + "5178 [Sci-Fi, Drama, Romance, School, Shoujo] \n", + "1933 [Slice of Life, Drama, Fantasy] \n", + "14997 [Drama] \n", + "9444 [Action, Adventure, Fantasy] \n", + "12900 [Action, Demons, Historical, Shounen, Supe... \n", + "17023 [Slice of Life, Drama, Romance, Fantasy] \n", + "\n", + " English name Japanese name Type Episodes \\\n", + "12056 Princess Mononoke もののけ姫 Movie 1 \n", + "2741 Spirited Away 千と千尋の神隠し Movie 1 \n", + "2052 Death Note デスノート TV 37 \n", + "2140 5 Centimeters Per Second 秒速5センチメートル Movie 3 \n", + "17371 The Girl Who Leapt Through Time 時をかける少女 Movie 1 \n", + "5164 Another アナザー TV 12 \n", + "3936 Attack on Titan 進撃の巨人 TV 25 \n", + "16704 Blue Spring Ride アオハライド TV 12 \n", + "16584 Tokyo Ghoul 東京喰種-トーキョーグール- TV 12 \n", + "4752 Your Lie in April 四月は君の嘘 TV 22 \n", + "7045 Death Parade デス・パレード TV 12 \n", + "7640 The Anthem of the Heart 心が叫びたがってるんだ。 Movie 1 \n", + "11419 A Silent Voice 聲の形 Movie 1 \n", + "4293 ERASED 僕だけがいない街 TV 12 \n", + "537 Unknown Schick × エヴァンゲリオン Special 2 \n", + "9102 Bungo Stray Dogs 文豪ストレイドッグス TV 12 \n", + "13520 Your Name. 君の名は。 Movie 1 \n", + "1844 Berserk ベルセルク TV 12 \n", + "5178 Orange orange(オレンジ) TV 13 \n", + "1933 Violet Evergarden ヴァイオレット・エヴァーガーデン TV 13 \n", + "14997 I want to eat your pancreas 君の膵臓をたべたい Movie 1 \n", + "9444 Goblin Slayer ゴブリンスレイヤー TV 12 \n", + "12900 Demon Slayer:Kimetsu no Yaiba 鬼滅の刃 TV 26 \n", + "17023 Weathering With You 天気の子 Movie 1 \n", + "\n", + " Aired Premiered ... Super Power \\\n", + "12056 Jul 12, 1997 Unknown ... 0.0 \n", + "2741 Jul 20, 2001 Unknown ... 0.0 \n", + "2052 Oct 4, 2006 to Jun 27, 2007 Fall 2006 ... 0.0 \n", + "2140 Mar 3, 2007 Unknown ... 0.0 \n", + "17371 Jul 15, 2006 Unknown ... 0.0 \n", + "5164 Jan 10, 2012 to Mar 27, 2012 Winter 2012 ... 0.0 \n", + "3936 Apr 7, 2013 to Sep 29, 2013 Spring 2013 ... 0.0 \n", + "16704 Jul 8, 2014 to Sep 23, 2014 Summer 2014 ... 0.0 \n", + "16584 Jul 4, 2014 to Sep 19, 2014 Summer 2014 ... 0.0 \n", + "4752 Oct 10, 2014 to Mar 20, 2015 Fall 2014 ... 0.0 \n", + "7045 Jan 10, 2015 to Mar 28, 2015 Winter 2015 ... 0.0 \n", + "7640 Sep 19, 2015 Unknown ... 0.0 \n", + "11419 Sep 17, 2016 Unknown ... 0.0 \n", + "4293 Jan 8, 2016 to Mar 25, 2016 Winter 2016 ... 0.0 \n", + "537 May 11, 2015 Unknown ... 0.0 \n", + "9102 Apr 7, 2016 to Jun 23, 2016 Spring 2016 ... 0.0 \n", + "13520 Aug 26, 2016 Unknown ... 0.0 \n", + "1844 Jul 1, 2016 to Sep 16, 2016 Summer 2016 ... 0.0 \n", + "5178 Jul 4, 2016 to Sep 26, 2016 Summer 2016 ... 0.0 \n", + "1933 Jan 11, 2018 to Apr 5, 2018 Winter 2018 ... 0.0 \n", + "14997 Sep 1, 2018 Unknown ... 0.0 \n", + "9444 Oct 7, 2018 to Dec 30, 2018 Fall 2018 ... 0.0 \n", + "12900 Apr 6, 2019 to Sep 28, 2019 Spring 2019 ... 0.0 \n", + "17023 Jul 19, 2019 Unknown ... 0.0 \n", + "\n", + " Psychological Yuri Samurai Martial Arts Josei Shoujo Seinen Yaoi \\\n", + "12056 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2741 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2052 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2140 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17371 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5164 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3936 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16704 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16584 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4752 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7045 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7640 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11419 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4293 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "537 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "9102 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13520 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5178 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1933 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "9444 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "12900 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17023 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Shounen Ai \n", + "12056 0.0 \n", + "2741 0.0 \n", + "2052 0.0 \n", + "2140 0.0 \n", + "17371 0.0 \n", + "5164 0.0 \n", + "3936 0.0 \n", + "16704 0.0 \n", + "16584 0.0 \n", + "4752 0.0 \n", + "7045 0.0 \n", + "7640 0.0 \n", + "11419 0.0 \n", + "4293 0.0 \n", + "537 0.0 \n", + "9102 0.0 \n", + "13520 0.0 \n", + "1844 0.0 \n", + "5178 0.0 \n", + "1933 0.0 \n", + "14997 0.0 \n", + "9444 0.0 \n", + "12900 0.0 \n", + "17023 0.0 \n", + "\n", + "[24 rows x 109 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userAnimes = animeWithGenres_df[animeWithGenres_df['MAL_ID'].isin(inputAnime['MAL_ID'].tolist())]\n", + "userAnimes = userAnimes.sort_values(by='MAL_ID')\n", + "userAnimes" + ] + }, + { + "cell_type": "markdown", + "id": "052513c8", + "metadata": {}, + "source": [ + "### Remove the anime's that the user has seen from the whole list." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f19ea7da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Super PowerPsychologicalYuriSamuraiMartial ArtsJoseiShoujoSeinenYaoiShounen Ai
040176Miru Tights: Cosplay Satsuei Tights6.53[Ecchi, School]Unknownみるタイツ コスプレ撮影 タイツSpecial1Aug 23, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
113969Thermae Romae x Yoyogi Animation Gakuin Collab...6.29[Comedy, Historical, Seinen]Unknownテルマエ・ロマエx代々木アニメーション学院企業コラボレーションSpecial1Jul 9, 2012Unknown...0.00.00.00.00.00.00.00.00.00.0
213459Ribbon-chanUnknown[Comedy]UnknownリボンちゃんTV24Apr 4, 2012 to Mar 27, 2013Spring 2012...0.00.00.00.00.00.00.00.00.00.0
315617Jinrui wa Suitai Shimashita Specials7.23[Comedy, Fantasy, Seinen]Humanity Has Declined Specials人類は衰退しましたSpecial6Sep 19, 2012 to Feb 20, 2013Unknown...0.00.00.00.00.00.00.00.00.00.0
419157Youkai Watch6.54[Comedy, Demons, Kids, Supernatural]Yo-kai Watch妖怪ウォッチTV214Jan 8, 2014 to Mar 30, 2018Winter 2014...0.00.00.00.00.00.00.00.00.00.0
..................................................................
1755732238Watashi wa, Kairaku Izonshou6.2[Hentai]Unknown私は、快楽依存症OVA2Feb 26, 2016 to May 20, 2016Unknown...0.00.00.00.00.00.00.00.00.00.0
1755833552Mameshiba Bangai-hen5.75[Music, Comedy]Unknown豆しば番外編Special52008 to Jun 20, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
175598476Otome Youkai Zakuro7.47[Demons, Historical, Military, Romance, Se...Zakuroおとめ妖怪 ざくろTV13Oct 5, 2010 to Dec 28, 2010Fall 2010...0.00.00.00.00.00.00.00.00.00.0
17560953Jyu Oh Sei7.26[Action, Sci-Fi, Adventure, Mystery, Drama...Jyu-Oh-Sei:Planet of the Beast King獣王星TV11Apr 14, 2006 to Jun 23, 2006Spring 2006...0.00.00.00.00.00.00.00.00.00.0
1756139769Kimi ni Sekai6.7[Sci-Fi, Music, Fantasy]Unknown君に世界Music1Apr 20, 2019Unknown...0.00.00.00.00.00.00.00.00.00.0
\n", + "

17538 rows × 109 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "0 40176 Miru Tights: Cosplay Satsuei Tights 6.53 \n", + "1 13969 Thermae Romae x Yoyogi Animation Gakuin Collab... 6.29 \n", + "2 13459 Ribbon-chan Unknown \n", + "3 15617 Jinrui wa Suitai Shimashita Specials 7.23 \n", + "4 19157 Youkai Watch 6.54 \n", + "... ... ... ... \n", + "17557 32238 Watashi wa, Kairaku Izonshou 6.2 \n", + "17558 33552 Mameshiba Bangai-hen 5.75 \n", + "17559 8476 Otome Youkai Zakuro 7.47 \n", + "17560 953 Jyu Oh Sei 7.26 \n", + "17561 39769 Kimi ni Sekai 6.7 \n", + "\n", + " Genres \\\n", + "0 [Ecchi, School] \n", + "1 [Comedy, Historical, Seinen] \n", + "2 [Comedy] \n", + "3 [Comedy, Fantasy, Seinen] \n", + "4 [Comedy, Demons, Kids, Supernatural] \n", + "... ... \n", + "17557 [Hentai] \n", + "17558 [Music, Comedy] \n", + "17559 [Demons, Historical, Military, Romance, Se... \n", + "17560 [Action, Sci-Fi, Adventure, Mystery, Drama... \n", + "17561 [Sci-Fi, Music, Fantasy] \n", + "\n", + " English name Japanese name \\\n", + "0 Unknown みるタイツ コスプレ撮影 タイツ \n", + "1 Unknown テルマエ・ロマエx代々木アニメーション学院企業コラボレーション \n", + "2 Unknown リボンちゃん \n", + "3 Humanity Has Declined Specials 人類は衰退しました \n", + "4 Yo-kai Watch 妖怪ウォッチ \n", + "... ... ... \n", + "17557 Unknown 私は、快楽依存症 \n", + "17558 Unknown 豆しば番外編 \n", + "17559 Zakuro おとめ妖怪 ざくろ \n", + "17560 Jyu-Oh-Sei:Planet of the Beast King 獣王星 \n", + "17561 Unknown 君に世界 \n", + "\n", + " Type Episodes Aired Premiered ... \\\n", + "0 Special 1 Aug 23, 2019 Unknown ... \n", + "1 Special 1 Jul 9, 2012 Unknown ... \n", + "2 TV 24 Apr 4, 2012 to Mar 27, 2013 Spring 2012 ... \n", + "3 Special 6 Sep 19, 2012 to Feb 20, 2013 Unknown ... \n", + "4 TV 214 Jan 8, 2014 to Mar 30, 2018 Winter 2014 ... \n", + "... ... ... ... ... ... \n", + "17557 OVA 2 Feb 26, 2016 to May 20, 2016 Unknown ... \n", + "17558 Special 5 2008 to Jun 20, 2019 Unknown ... \n", + "17559 TV 13 Oct 5, 2010 to Dec 28, 2010 Fall 2010 ... \n", + "17560 TV 11 Apr 14, 2006 to Jun 23, 2006 Spring 2006 ... \n", + "17561 Music 1 Apr 20, 2019 Unknown ... \n", + "\n", + " Super Power Psychological Yuri Samurai Martial Arts Josei Shoujo \\\n", + "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... \n", + "17557 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17558 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17559 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17560 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17561 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Seinen Yaoi Shounen Ai \n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "17557 0.0 0.0 0.0 \n", + "17558 0.0 0.0 0.0 \n", + "17559 0.0 0.0 0.0 \n", + "17560 0.0 0.0 0.0 \n", + "17561 0.0 0.0 0.0 \n", + "\n", + "[17538 rows x 109 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "animeWithGenres_df = animeWithGenres_df[~animeWithGenres_df.isin(userAnimes).all(axis=1)]\n", + "animeWithGenres_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c3b473d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EcchiSchoolComedyHistoricalSeinenFantasyDemonsKidsSupernaturalSlice of Life...Super PowerPsychologicalYuriSamuraiMartial ArtsJoseiShoujoSeinenYaoiShounen Ai
00.00.00.00.00.01.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
20.00.00.00.00.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
50.01.00.00.00.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
60.00.00.00.00.01.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
70.01.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
80.00.00.00.01.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
90.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
100.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
110.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
120.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
130.00.00.00.01.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
140.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
150.00.00.00.01.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
160.01.00.00.00.00.00.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
170.00.00.00.01.01.01.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
180.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
190.00.00.00.00.01.00.00.00.01.0...0.00.00.00.00.00.00.00.00.00.0
200.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
210.00.00.00.00.01.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
220.00.00.01.00.00.01.00.01.00.0...0.00.00.00.00.00.00.00.00.00.0
230.00.00.00.00.01.00.00.00.01.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

24 rows × 84 columns

\n", + "
" + ], + "text/plain": [ + " Ecchi School Comedy Historical Seinen Fantasy Demons Kids \\\n", + "0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "7 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "8 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "9 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "14 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "16 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 \n", + "18 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "19 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "20 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "22 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 \n", + "23 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "\n", + " Supernatural Slice of Life ... Super Power Psychological Yuri \\\n", + "0 0.0 0.0 ... 0.0 0.0 0.0 \n", + "1 1.0 0.0 ... 0.0 0.0 0.0 \n", + "2 1.0 0.0 ... 0.0 0.0 0.0 \n", + "3 0.0 0.0 ... 0.0 0.0 0.0 \n", + "4 0.0 0.0 ... 0.0 0.0 0.0 \n", + "5 1.0 0.0 ... 0.0 0.0 0.0 \n", + "6 0.0 0.0 ... 0.0 0.0 0.0 \n", + "7 0.0 0.0 ... 0.0 0.0 0.0 \n", + "8 1.0 0.0 ... 0.0 0.0 0.0 \n", + "9 0.0 0.0 ... 0.0 0.0 0.0 \n", + "10 0.0 0.0 ... 0.0 0.0 0.0 \n", + "11 0.0 0.0 ... 0.0 0.0 0.0 \n", + "12 0.0 0.0 ... 0.0 0.0 0.0 \n", + "13 1.0 0.0 ... 0.0 0.0 0.0 \n", + "14 0.0 0.0 ... 0.0 0.0 0.0 \n", + "15 1.0 0.0 ... 0.0 0.0 0.0 \n", + "16 1.0 0.0 ... 0.0 0.0 0.0 \n", + "17 1.0 0.0 ... 0.0 0.0 0.0 \n", + "18 0.0 0.0 ... 0.0 0.0 0.0 \n", + "19 0.0 1.0 ... 0.0 0.0 0.0 \n", + "20 0.0 0.0 ... 0.0 0.0 0.0 \n", + "21 0.0 0.0 ... 0.0 0.0 0.0 \n", + "22 1.0 0.0 ... 0.0 0.0 0.0 \n", + "23 0.0 1.0 ... 0.0 0.0 0.0 \n", + "\n", + " Samurai Martial Arts Josei Shoujo Seinen Yaoi Shounen Ai \n", + "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "19 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "20 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "22 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "23 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[24 rows x 84 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userAnimes = userAnimes.reset_index(drop=True)\n", + "\n", + "userGenreTable = userAnimes.iloc[:, 25:]\n", + "userGenreTable" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e87cf59f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 7.5\n", + "1 7.5\n", + "2 7.5\n", + "3 6.0\n", + "4 7.0\n", + "5 7.5\n", + "6 7.0\n", + "7 5.5\n", + "8 6.5\n", + "9 8.0\n", + "10 8.0\n", + "11 7.5\n", + "12 8.5\n", + "13 10.0\n", + "14 5.0\n", + "15 7.5\n", + "16 8.0\n", + "17 8.0\n", + "18 6.0\n", + "19 9.5\n", + "20 8.5\n", + "21 6.0\n", + "22 7.0\n", + "23 8.0\n", + "Name: Rating, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputAnime.Rating" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "68b12d58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Ecchi 0.0\n", + " School 51.0\n", + "Comedy 10.5\n", + " Historical 7.0\n", + " Seinen 32.0\n", + " ... \n", + "Josei 0.0\n", + "Shoujo 0.0\n", + "Seinen 0.0\n", + "Yaoi 0.0\n", + "Shounen Ai 0.0\n", + "Length: 84, dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userProfile = userGenreTable.transpose().dot(inputAnime['Rating'])\n", + "userProfile" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "16c0f28a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EcchiSchoolComedyHistoricalSeinenFantasyDemonsKidsSupernaturalSlice of Life...Super PowerPsychologicalYuriSamuraiMartial ArtsJoseiShoujoSeinenYaoiShounen Ai
MAL_ID
401761.01.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
139690.00.01.01.01.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
134590.00.01.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
156170.00.01.00.01.01.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
191570.00.01.00.00.00.01.01.01.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 84 columns

\n", + "
" + ], + "text/plain": [ + " Ecchi School Comedy Historical Seinen Fantasy Demons \\\n", + "MAL_ID \n", + "40176 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "13969 0.0 0.0 1.0 1.0 1.0 0.0 0.0 \n", + "13459 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "15617 0.0 0.0 1.0 0.0 1.0 1.0 0.0 \n", + "19157 0.0 0.0 1.0 0.0 0.0 0.0 1.0 \n", + "\n", + " Kids Supernatural Slice of Life ... Super Power Psychological \\\n", + "MAL_ID ... \n", + "40176 0.0 0.0 0.0 ... 0.0 0.0 \n", + "13969 0.0 0.0 0.0 ... 0.0 0.0 \n", + "13459 0.0 0.0 0.0 ... 0.0 0.0 \n", + "15617 0.0 0.0 0.0 ... 0.0 0.0 \n", + "19157 1.0 1.0 0.0 ... 0.0 0.0 \n", + "\n", + " Yuri Samurai Martial Arts Josei Shoujo Seinen Yaoi Shounen Ai \n", + "MAL_ID \n", + "40176 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13969 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13459 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15617 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "19157 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 84 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "genreTable = animeWithGenres_df.set_index(animeWithGenres_df['MAL_ID'])\n", + "genreTable = genreTable.iloc[:, 25:]\n", + "genreTable.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7f29b595", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(17538, 84)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "genreTable.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3c1f716d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MAL_ID\n", + "40176 0.064721\n", + "13969 0.062817\n", + "13459 0.013325\n", + "15617 0.112310\n", + "19157 0.120558\n", + "dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recommendationTable_df = ((genreTable*userProfile).sum(axis=1))/(userProfile.sum())\n", + "recommendationTable_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a042aaa0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MAL_ID\n", + "35009 0.517132\n", + "33 0.517132\n", + "449 0.496193\n", + "450 0.496193\n", + "451 0.496193\n", + "dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recommendationTable_df = recommendationTable_df.sort_values(ascending=False)\n", + "recommendationTable_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1ed6d547", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MAL_IDNameScoreGenresEnglish nameJapanese nameTypeEpisodesAiredPremiered...Score-10Score-9Score-8Score-7Score-6Score-5Score-4Score-3Score-2Score-1
035009Berserk Recap6.02[Action, Adventure, Demons, Drama, Fantasy...Unknownベルセルク 第1期ダイジェスト映像Special1Mar 3, 2017Unknown...373.0212.0433.0797.01019.0663.0293.0183.0140.0231.0
133Kenpuu Denki Berserk8.49[Action, Adventure, Demons, Drama, Fantasy...Berserk剣風伝奇ベルセルクTV25Oct 8, 1997 to Apr 1, 1998Fall 1997...58627.065906.060815.029055.09477.03899.01748.0671.0456.0842.0
2449InuYasha Movie 4: Guren no Houraijima7.54[Action, Adventure, Comedy, Historical, De...InuYasha the Movie 4:Fire on the Mystic Island犬夜叉 紅蓮の蓬莱島Movie1Dec 23, 2004Unknown...5230.06127.09865.011837.05135.02190.0671.0225.092.073.0
3450InuYasha Movie 2: Kagami no Naka no Mugenjo7.66[Action, Adventure, Comedy, Historical, De...InuYasha the Movie 2:The Castle Beyond the Loo...犬夜叉 鏡の中の夢幻城Movie1Dec 21, 2002Unknown...6722.07566.011990.012862.05409.02184.0607.0206.096.071.0
4451InuYasha Movie 3: Tenka Hadou no Ken7.8[Action, Adventure, Comedy, Historical, De...InuYasha the Movie 3:Swords of an Honorable Ruler犬夜叉 天下覇道の剣Movie1Dec 20, 2003Unknown...6718.07647.011985.011322.04397.01687.0395.0160.060.066.0
5452InuYasha Movie 1: Toki wo Koeru Omoi7.56[Action, Adventure, Comedy, Historical, De...InuYasha the Movie:Affections Touching Across ...犬夜叉 時代を越える想いMovie1Dec 22, 2001Unknown...6033.06802.011048.013002.05767.02369.0620.0255.0104.093.0
6969Tsubasa Chronicle 2nd Season7.6[Action, Adventure, Fantasy, Romance, Supe...Tsubasa RESERVoir CHRoNiCLE Season Twoツバサ・クロニクル 第2シリーズTV26Apr 29, 2006 to Nov 4, 2006Spring 2006...6346.08909.014525.013920.06024.02757.01101.0401.0215.0139.0
74938Tsubasa: Shunraiki8.13[Action, Adventure, Mystery, Supernatural, ...Tsubasa RESERVoir CHRoNiCLE:Spring Thunder Chr...ツバサ 春雷記OVA2Mar 17, 2009 to May 15, 2009Unknown...4738.06561.08069.05065.01749.0691.0186.074.032.062.0
834055Berserk 2nd Season6.69[Action, Adventure, Demons, Drama, Fantasy...Berserk:Season IIベルセルクTV12Apr 7, 2017 to Jun 23, 2017Spring 2017...5577.06876.012904.015677.09697.05290.04305.02697.02122.02798.0
92983Digital Devil Story: Megami Tensei5.21[Adventure, Mystery, Horror, Demons, Psych...Unknownデジタル・デビル物語〈ストーリ〉 女神転生OVA1Mar 25, 1987Unknown...60.062.0170.0378.0609.0634.0452.0271.0165.083.0
\n", + "

10 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " MAL_ID Name Score \\\n", + "0 35009 Berserk Recap 6.02 \n", + "1 33 Kenpuu Denki Berserk 8.49 \n", + "2 449 InuYasha Movie 4: Guren no Houraijima 7.54 \n", + "3 450 InuYasha Movie 2: Kagami no Naka no Mugenjo 7.66 \n", + "4 451 InuYasha Movie 3: Tenka Hadou no Ken 7.8 \n", + "5 452 InuYasha Movie 1: Toki wo Koeru Omoi 7.56 \n", + "6 969 Tsubasa Chronicle 2nd Season 7.6 \n", + "7 4938 Tsubasa: Shunraiki 8.13 \n", + "8 34055 Berserk 2nd Season 6.69 \n", + "9 2983 Digital Devil Story: Megami Tensei 5.21 \n", + "\n", + " Genres \\\n", + "0 [Action, Adventure, Demons, Drama, Fantasy... \n", + "1 [Action, Adventure, Demons, Drama, Fantasy... \n", + "2 [Action, Adventure, Comedy, Historical, De... \n", + "3 [Action, Adventure, Comedy, Historical, De... \n", + "4 [Action, Adventure, Comedy, Historical, De... \n", + "5 [Action, Adventure, Comedy, Historical, De... \n", + "6 [Action, Adventure, Fantasy, Romance, Supe... \n", + "7 [Action, Adventure, Mystery, Supernatural, ... \n", + "8 [Action, Adventure, Demons, Drama, Fantasy... \n", + "9 [Adventure, Mystery, Horror, Demons, Psych... \n", + "\n", + " English name Japanese name \\\n", + "0 Unknown ベルセルク 第1期ダイジェスト映像 \n", + "1 Berserk 剣風伝奇ベルセルク \n", + "2 InuYasha the Movie 4:Fire on the Mystic Island 犬夜叉 紅蓮の蓬莱島 \n", + "3 InuYasha the Movie 2:The Castle Beyond the Loo... 犬夜叉 鏡の中の夢幻城 \n", + "4 InuYasha the Movie 3:Swords of an Honorable Ruler 犬夜叉 天下覇道の剣 \n", + "5 InuYasha the Movie:Affections Touching Across ... 犬夜叉 時代を越える想い \n", + "6 Tsubasa RESERVoir CHRoNiCLE Season Two ツバサ・クロニクル 第2シリーズ \n", + "7 Tsubasa RESERVoir CHRoNiCLE:Spring Thunder Chr... ツバサ 春雷記 \n", + "8 Berserk:Season II ベルセルク \n", + "9 Unknown デジタル・デビル物語〈ストーリ〉 女神転生 \n", + "\n", + " Type Episodes Aired Premiered ... Score-10 \\\n", + "0 Special 1 Mar 3, 2017 Unknown ... 373.0 \n", + "1 TV 25 Oct 8, 1997 to Apr 1, 1998 Fall 1997 ... 58627.0 \n", + "2 Movie 1 Dec 23, 2004 Unknown ... 5230.0 \n", + "3 Movie 1 Dec 21, 2002 Unknown ... 6722.0 \n", + "4 Movie 1 Dec 20, 2003 Unknown ... 6718.0 \n", + "5 Movie 1 Dec 22, 2001 Unknown ... 6033.0 \n", + "6 TV 26 Apr 29, 2006 to Nov 4, 2006 Spring 2006 ... 6346.0 \n", + "7 OVA 2 Mar 17, 2009 to May 15, 2009 Unknown ... 4738.0 \n", + "8 TV 12 Apr 7, 2017 to Jun 23, 2017 Spring 2017 ... 5577.0 \n", + "9 OVA 1 Mar 25, 1987 Unknown ... 60.0 \n", + "\n", + " Score-9 Score-8 Score-7 Score-6 Score-5 Score-4 Score-3 Score-2 \\\n", + "0 212.0 433.0 797.0 1019.0 663.0 293.0 183.0 140.0 \n", + "1 65906.0 60815.0 29055.0 9477.0 3899.0 1748.0 671.0 456.0 \n", + "2 6127.0 9865.0 11837.0 5135.0 2190.0 671.0 225.0 92.0 \n", + "3 7566.0 11990.0 12862.0 5409.0 2184.0 607.0 206.0 96.0 \n", + "4 7647.0 11985.0 11322.0 4397.0 1687.0 395.0 160.0 60.0 \n", + "5 6802.0 11048.0 13002.0 5767.0 2369.0 620.0 255.0 104.0 \n", + "6 8909.0 14525.0 13920.0 6024.0 2757.0 1101.0 401.0 215.0 \n", + "7 6561.0 8069.0 5065.0 1749.0 691.0 186.0 74.0 32.0 \n", + "8 6876.0 12904.0 15677.0 9697.0 5290.0 4305.0 2697.0 2122.0 \n", + "9 62.0 170.0 378.0 609.0 634.0 452.0 271.0 165.0 \n", + "\n", + " Score-1 \n", + "0 231.0 \n", + "1 842.0 \n", + "2 73.0 \n", + "3 71.0 \n", + "4 66.0 \n", + "5 93.0 \n", + "6 139.0 \n", + "7 62.0 \n", + "8 2798.0 \n", + "9 83.0 \n", + "\n", + "[10 rows x 35 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_mal_ids = recommendationTable_df.head(10).keys()\n", + "anime_df.set_index('MAL_ID').loc[top_mal_ids].reset_index()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recommender_system/content_based/dataset/archive.zip b/recommender_system/content_based/dataset/archive.zip new file mode 100644 index 0000000..3815e3f Binary files /dev/null and b/recommender_system/content_based/dataset/archive.zip differ