Intelligent Film Discovery Engine
A sophisticated machine learning system built in Python that analyzes user preferences and movie correlations to deliver personalized film recommendations. Developed for Advanced Python Scripting using the comprehensive MovieLens dataset.
View on GitHub# Movie Recommendation System - MovieLens Dataset Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load MovieLens 25M dataset
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')
# Data exploration and preprocessing
print(f"Dataset contains {len(ratings)} ratings")
print(f"Number of unique users: {ratings['userId'].nunique()}")
print(f"Number of unique movies: {ratings['movieId'].nunique()}")
# Create user-movie rating matrix
user_movie_matrix = ratings.pivot_table(
index='userId',
columns='movieId',
values='rating'
).fillna(0)
# Calculate movie correlation matrix
movie_correlation = user_movie_matrix.corr(method='pearson')
# Collaborative Filtering Implementation
def get_movie_recommendations(movie_title, correlation_matrix, n_recommendations=10):
"""
Generate movie recommendations based on correlation analysis
"""
try:
# Find movie ID from title
movie_id = movies[movies['title'].str.contains(movie_title, case=False)]['movieId'].iloc[0]
# Get correlations for the selected movie
movie_correlations = correlation_matrix[movie_id].dropna()
# Sort by correlation strength and get top recommendations
recommendations = movie_correlations.sort_values(ascending=False)[1:n_recommendations+1]
# Convert movie IDs back to titles
recommended_movies = []
for movie_id in recommendations.index:
title = movies[movies['movieId'] == movie_id]['title'].iloc[0]
correlation_score = recommendations[movie_id]
recommended_movies.append((title, correlation_score))
return recommended_movies
except IndexError:
return [("Movie not found in database", 0.0)]
# Example usage
recommendations = get_movie_recommendations("Star Wars", movie_correlation)
for i, (title, score) in enumerate(recommendations, 1):
print(f"{i}: {title} (Correlation: {score:.3f})")
# Advanced Analytics and Visualization
def analyze_user_preferences(user_id):
"""
Analyze individual user rating patterns and preferences
"""
user_ratings = ratings[ratings['userId'] == user_id]
# Merge with movie information
user_movies = user_ratings.merge(movies, on='movieId')
# Extract genres and analyze preferences
genre_ratings = {}
for index, row in user_movies.iterrows():
genres = row['genres'].split('|')
for genre in genres:
if genre not in genre_ratings:
genre_ratings[genre] = []
genre_ratings[genre].append(row['rating'])
# Calculate average ratings per genre
avg_genre_ratings = {
genre: np.mean(ratings_list)
for genre, ratings_list in genre_ratings.items()
}
return sorted(avg_genre_ratings.items(), key=lambda x: x[1], reverse=True)
# Generate comprehensive statistics
def generate_dataset_insights():
"""
Generate comprehensive insights about the MovieLens dataset
"""
insights = {
'total_ratings': len(ratings),
ratings['userId'].nunique(),
ratings['movieId'].nunique(),
['rating'].mean(),
['rating'].value_counts().to_dict()
}
return insights