Ich verwende den Movielens 100K -Bewertungsdatensatz, um ein rein kollaboratives Filterempfehlungssystem zu erstellen. Ich implementiere das Empfehlungssystem mit Klassenmethoden, wobei jede Methode von der Ausgabe früherer abhängt. Um die Leistung zu optimieren, habe ich einen Caching -Dekorateator hinzugefügt, um die Ergebnisse der Methoden zu speichern. Wenn ich jedoch nacheinander mehrere Methoden aufrufe, geben sie alle die gleiche Ausgabe wie die Methode zurück (MAP_TO_RANGE ()) anstelle ihrer jeweiligen Ausgänge. /> < /ul>
Code Beispiel: < /p>
class Recommend_using_pcc_decorator:
def __init__(self, user_item_rating_matrix, similarity_matrix, top_k_users_dict, no_of_top_k_users, movies_df):
self.user_item_rating_matrix = user_item_rating_matrix
self.similarity_matrix = similarity_matrix
self.top_k_users_dict = top_k_users_dict
self.no_of_top_k_users = no_of_top_k_users
self.movies_df = movies_df
# to store intermediate result
self.min_val = None # Store the last used min_val
self.max_val = None # Store the last used max_val
# Defining a decorator to cache on subsequent calls, improving performance for repeated operations.
def cache(method):
def wrapper(self, *args):
if not hasattr(self, "_method_cache"):
self._method_cache = {}
# Include relevant instance variables that affect the method’s output
state_key = (self.min_val, self.max_val)
cache_key = (method.__name__, args, state_key)
if cache_key in self._method_cache:
return self._method_cache[cache_key]
result = method(self,*args)
self._method_cache[cache_key] = result
return result
return wrapper
@cache
def mean_centered_predicted_ratings_df(self) -> pd.DataFrame:
# movie_id_to_col: Maps actual movie IDs to column indices in the NumPy matrix.
movie_id_to_col = {movie_id: idx for idx, movie_id in enumerate(self.user_item_rating_matrix.columns)}
# col_to_movie_id: Maps column indices back to actual movie IDs when retrieving results.
col_to_movie_id = {idx: movie_id for movie_id, idx in movie_id_to_col.items()} # Reverse mapping
user_item_rating_matrix = self.user_item_rating_matrix.to_numpy()
predicted_ratings = user_item_rating_matrix.copy()
# Initialize a matrix (0-based indexing)
top_k_users_matrix = np.zeros((user_item_rating_matrix.shape[0], self.no_of_top_k_users), dtype=int)
# Convert the dictionary values to 0-based indexing
for user_id in range(1, user_item_rating_matrix.shape[0]): # Iterate over original user IDs (1 to 610)
top_k_users_matrix[user_id - 1] = np.array(self.top_k_users_dict[user_id]) - 1
# Sum of ratings for each user (row-wise)
user_item_rating_matrix_sum = np.sum(user_item_rating_matrix, axis=1)
# Mask of non-zero elements (True where ratings exist, False for missing ratings)
user_item_rating_matrix_mask = user_item_rating_matrix != 0
# Count of non-zero ratings for each user (to avoid division by zero)
non_zero_rating_count = np.sum(user_item_rating_matrix_mask, axis=1)
# Compute mean ratings, avoiding division by zero
user_mean_ratings = np.true_divide(user_item_rating_matrix_sum,
non_zero_rating_count,
where=(non_zero_rating_count != 0))
user_ids = []
movie_ids = []
predicted_values_list = []
total_users = user_item_rating_matrix.shape[0]
# Iterate over all users
for user in range(total_users):
similar_users = top_k_users_matrix[user] # Get indices of the top k similar users
similarities = self.similarity_matrix[user, similar_users] # Get similarity scores
# Get the ratings of the top-k similar users
similar_users_ratings = user_item_rating_matrix[similar_users, :]
# Mask out the movies that have not been rated by similar users
mask = similar_users_ratings != 0 # True where rating is available, False where 0
# Mean ratings of the similar users
mean_rating_sim_users = user_mean_ratings[similar_users]
# Mean centered ratings of similar users
mean_centered_similar_user_ratings = similar_users_ratings - mean_rating_sim_users[:, np.newaxis]
# Numerator calculation
numerator = np.sum(mean_centered_similar_user_ratings * similarities[:, np.newaxis] * mask, axis=0)
# Compute sum of absolute similarities (denominator)
denominator = np.sum(np.abs(similarities[:, np.newaxis]) * mask, axis=0)
# Avoid division by zero
with np.errstate(divide="ignore", invalid="ignore"):
predicted_values = numerator / denominator
predicted_values[denominator == 0] = np.nan
# Adding back the mean to the predicted ratings
mean_centered_predicted_ratings = predicted_values + user_mean_ratings[user]
missing_ratings_mask = predicted_ratings[user, :] == 0
missing_indices = np.where(missing_ratings_mask)[0]
predicted_ratings[user, missing_ratings_mask] = mean_centered_predicted_ratings[missing_ratings_mask]
# Extract predicted ratings corresponding to the previously missing values
predicted_vals = predicted_ratings[user][missing_indices]
# Remove NaN values from predicted values
valid_indices = ~np.isnan(predicted_vals) # Boolean mask to filter valid values
filtered_movies = missing_indices[valid_indices]
filtered_predictions = predicted_vals[valid_indices]
# Store data in lists (adjusting user_id to start from 1)
user_ids.extend([user + 1] * len(filtered_movies)) # Add 1 to user_id
movie_ids.extend(filtered_movies)
predicted_values_list.extend(filtered_predictions)
# Create DataFrame at the end
recommendations_df = pd.DataFrame({
"userId": user_ids,
"movieId": movie_ids,
"predicted_rating": predicted_values_list
})
# Convert column indices back to movie IDs before storing in DataFrame
recommendations_df["movieId"] = recommendations_df["movieId"].map(col_to_movie_id)
# Sort DataFrame by predicted rating (descending) for better recommendations
recommendations_df = recommendations_df.sort_values(by=["userId","predicted_rating"], ascending=[True,False])
return recommendations_df
@cache
def sigmoid(self) -> pd.DataFrame:
df = self.mean_centered_predicted_ratings_df()
df["predicted_rating"] = np.reciprocal(1 + np.exp(-df["predicted_rating"]))
return df
@cache
def map_to_range(self, min_val, max_val) -> pd.DataFrame:
df = self.sigmoid()
self.min_val = min_val # Store the last used min_val
self.max_val = max_val # Store the last used max_val
df["predicted_rating"] = min_val + (max_val - min_val) * df["predicted_rating"]
return df
@cache
def movie_recommend(self, user_id) -> pd.DataFrame:
"""
Returns the top 5 recommended movies for a user.
Parameters:
- user_id (int): The ID of the user.
Returns:
- A DataFrame with top 5 recommended movies (title, genres).
"""
df = self.map_to_range(self.min_val, self.max_val)
# Get top 5 recommended movie IDs for the user
top_5_recommended_movieids = df[df["userId"] == user_id].head(5)[["movieId"]]
# Merge with movies DataFrame to get movie titles and genres
top_5_recommended_movies = pd.merge(top_5_recommended_movieids, self.movies_df, on="movieId")[["title", "genres"]]
return top_5_recommended_movies
Warum geben alle Methoden das gleiche Ergebnis zurück, nachdem ich MAP_TO_RANGE () ?
Wie kann ich dieses Caching -Problem beheben, während ich sicherstellen kann, dass unabhängige Methoden rechnen. besteht.
Ich verwende den Movielens 100K -Bewertungsdatensatz, um ein rein kollaboratives Filterempfehlungssystem zu erstellen. Ich implementiere das Empfehlungssystem mit Klassenmethoden, wobei jede Methode von der Ausgabe früherer abhängt. Um die Leistung zu optimieren, habe ich einen Caching -Dekorateator hinzugefügt, um die Ergebnisse der Methoden zu speichern. Wenn ich jedoch nacheinander mehrere Methoden aufrufe, geben sie alle die gleiche Ausgabe wie die Methode zurück (MAP_TO_RANGE ()) anstelle ihrer jeweiligen Ausgänge. /> < /ul> Code Beispiel: < /p> [code]class Recommend_using_pcc_decorator:
# movie_id_to_col: Maps actual movie IDs to column indices in the NumPy matrix. movie_id_to_col = {movie_id: idx for idx, movie_id in enumerate(self.user_item_rating_matrix.columns)}
# col_to_movie_id: Maps column indices back to actual movie IDs when retrieving results. col_to_movie_id = {idx: movie_id for movie_id, idx in movie_id_to_col.items()} # Reverse mapping
# Convert the dictionary values to 0-based indexing for user_id in range(1, user_item_rating_matrix.shape[0]): # Iterate over original user IDs (1 to 610) top_k_users_matrix[user_id - 1] = np.array(self.top_k_users_dict[user_id]) - 1
# Sum of ratings for each user (row-wise) user_item_rating_matrix_sum = np.sum(user_item_rating_matrix, axis=1)
# Mask of non-zero elements (True where ratings exist, False for missing ratings) user_item_rating_matrix_mask = user_item_rating_matrix != 0
# Count of non-zero ratings for each user (to avoid division by zero) non_zero_rating_count = np.sum(user_item_rating_matrix_mask, axis=1)
# Compute mean ratings, avoiding division by zero user_mean_ratings = np.true_divide(user_item_rating_matrix_sum, non_zero_rating_count, where=(non_zero_rating_count != 0))
# Iterate over all users for user in range(total_users): similar_users = top_k_users_matrix[user] # Get indices of the top k similar users similarities = self.similarity_matrix[user, similar_users] # Get similarity scores
# Get the ratings of the top-k similar users similar_users_ratings = user_item_rating_matrix[similar_users, :]
# Mask out the movies that have not been rated by similar users mask = similar_users_ratings != 0 # True where rating is available, False where 0
# Mean ratings of the similar users mean_rating_sim_users = user_mean_ratings[similar_users]
# Mean centered ratings of similar users mean_centered_similar_user_ratings = similar_users_ratings - mean_rating_sim_users[:, np.newaxis]
# Compute sum of absolute similarities (denominator) denominator = np.sum(np.abs(similarities[:, np.newaxis]) * mask, axis=0)
# Avoid division by zero with np.errstate(divide="ignore", invalid="ignore"): predicted_values = numerator / denominator predicted_values[denominator == 0] = np.nan
# Adding back the mean to the predicted ratings mean_centered_predicted_ratings = predicted_values + user_mean_ratings[user] missing_ratings_mask = predicted_ratings[user, :] == 0 missing_indices = np.where(missing_ratings_mask)[0]
# Extract predicted ratings corresponding to the previously missing values predicted_vals = predicted_ratings[user][missing_indices]
# Remove NaN values from predicted values valid_indices = ~np.isnan(predicted_vals) # Boolean mask to filter valid values filtered_movies = missing_indices[valid_indices] filtered_predictions = predicted_vals[valid_indices]
# Store data in lists (adjusting user_id to start from 1) user_ids.extend([user + 1] * len(filtered_movies)) # Add 1 to user_id movie_ids.extend(filtered_movies) predicted_values_list.extend(filtered_predictions)
# Create DataFrame at the end recommendations_df = pd.DataFrame({ "userId": user_ids, "movieId": movie_ids, "predicted_rating": predicted_values_list })
# Convert column indices back to movie IDs before storing in DataFrame recommendations_df["movieId"] = recommendations_df["movieId"].map(col_to_movie_id)
# Sort DataFrame by predicted rating (descending) for better recommendations recommendations_df = recommendations_df.sort_values(by=["userId","predicted_rating"], ascending=[True,False])
@cache def movie_recommend(self, user_id) -> pd.DataFrame: """ Returns the top 5 recommended movies for a user.
Parameters: - user_id (int): The ID of the user.
Returns: - A DataFrame with top 5 recommended movies (title, genres). """ df = self.map_to_range(self.min_val, self.max_val)
# Get top 5 recommended movie IDs for the user top_5_recommended_movieids = df[df["userId"] == user_id].head(5)[["movieId"]]
# Merge with movies DataFrame to get movie titles and genres top_5_recommended_movies = pd.merge(top_5_recommended_movieids, self.movies_df, on="movieId")[["title", "genres"]]
return top_5_recommended_movies
[/code] Warum geben alle Methoden das gleiche Ergebnis zurück, nachdem ich MAP_TO_RANGE () ? Wie kann ich dieses Caching -Problem beheben, während ich sicherstellen kann, dass unabhängige Methoden rechnen. besteht.
Ich versuche, die praktische Bedeutung abstrakter Klassen in der objektorientierten Programmierung, insbesondere in PHP, zu verstehen.
Nach meinem Verständnis erzwingen abstrakte Klassen...
Ich verwende Coil Bibliothek in Jetpack komponieren , um Bilder aus dem Netzwerk mit Asyncimage zu laden. Ich habe jedoch festgestellt, dass Asyncimage das Bild zwischengespeichert. Wenn also die...
Ich verwende Coil Bibliothek in Jetpack komponieren , um Bilder aus dem Netzwerk mit Asyncimage zu laden. Ich habe jedoch festgestellt, dass Asyncimage das Bild zwischengespeichert. Wenn also die...
Warum-Sync und --no-sync im DF-Befehl in Linux, gibt mir die gleiche Ausgabe? Dieser df-no-sync und dann habe ich das df-sync flagge ausgeführt, aber beide geben mir die gleiche Ausgabe. Also habe...