Zwischengespeicherte Methoden, die die gleiche Ausgabe unerwartet in einer Klasse zurückgebenPython

Python-Programme
Anonymous
 Zwischengespeicherte Methoden, die die gleiche Ausgabe unerwartet in einer Klasse zurückgeben

Post by Anonymous »

Ich verwende den Movielens 100K -Bewertungsdatensatz, um ein rein kollaboratives Filterempfehlungssystem zu erstellen. Ich implementiere das Empfehlungssystem mit Klassenmethoden, wobei jede Methode von der Ausgabe früherer abhängt. Um die Leistung zu optimieren, habe ich einen Caching -Dekorateator hinzugefügt, um die Ergebnisse der Methoden zu speichern. Wenn ich jedoch nacheinander mehrere Methoden aufrufe, geben sie alle die gleiche Ausgabe wie die Methode zurück (MAP_TO_RANGE ()) anstelle ihrer jeweiligen Ausgänge. /> < /ul>
Code Beispiel: < /p>

Code: Select all

class Recommend_using_pcc_decorator:

def __init__(self, user_item_rating_matrix, similarity_matrix, top_k_users_dict, no_of_top_k_users, movies_df):

self.user_item_rating_matrix = user_item_rating_matrix
self.similarity_matrix = similarity_matrix
self.top_k_users_dict = top_k_users_dict
self.no_of_top_k_users = no_of_top_k_users
self.movies_df = movies_df

# to store intermediate result
self.min_val = None  # Store the last used min_val
self.max_val = None # Store the last used max_val

# Defining a decorator to cache on subsequent calls, improving performance for repeated operations.
def cache(method):

def wrapper(self, *args):
if not hasattr(self, "_method_cache"):
self._method_cache = {}

# Include relevant instance variables that affect the method’s output
state_key = (self.min_val, self.max_val)
cache_key = (method.__name__, args, state_key)

if cache_key in self._method_cache:
return self._method_cache[cache_key]

result = method(self,*args)
self._method_cache[cache_key] = result
return result

return wrapper

@cache
def mean_centered_predicted_ratings_df(self) ->  pd.DataFrame:

# movie_id_to_col: Maps actual movie IDs to column indices in the NumPy matrix.
movie_id_to_col = {movie_id: idx for idx, movie_id in enumerate(self.user_item_rating_matrix.columns)}

# col_to_movie_id: Maps column indices back to actual movie IDs when retrieving results.
col_to_movie_id = {idx: movie_id for movie_id, idx in movie_id_to_col.items()}  # Reverse mapping

user_item_rating_matrix = self.user_item_rating_matrix.to_numpy()
predicted_ratings = user_item_rating_matrix.copy()

# Initialize a matrix (0-based indexing)
top_k_users_matrix = np.zeros((user_item_rating_matrix.shape[0], self.no_of_top_k_users), dtype=int)

# Convert the dictionary values to 0-based indexing
for user_id in range(1, user_item_rating_matrix.shape[0]):  # Iterate over original user IDs (1 to 610)
top_k_users_matrix[user_id - 1] = np.array(self.top_k_users_dict[user_id]) - 1

# Sum of ratings for each user (row-wise)
user_item_rating_matrix_sum = np.sum(user_item_rating_matrix, axis=1)

# Mask of non-zero elements (True where ratings exist, False for missing ratings)
user_item_rating_matrix_mask = user_item_rating_matrix != 0

# Count of non-zero ratings for each user (to avoid division by zero)
non_zero_rating_count = np.sum(user_item_rating_matrix_mask, axis=1)

# Compute mean ratings, avoiding division by zero
user_mean_ratings = np.true_divide(user_item_rating_matrix_sum,
non_zero_rating_count,
where=(non_zero_rating_count != 0))

user_ids = []
movie_ids = []
predicted_values_list = []

total_users = user_item_rating_matrix.shape[0]

# Iterate over all users
for user in range(total_users):
similar_users = top_k_users_matrix[user]               # Get indices of the top k similar users
similarities = self.similarity_matrix[user, similar_users]  # Get similarity scores

# Get the ratings of the top-k similar users
similar_users_ratings = user_item_rating_matrix[similar_users, :]

# Mask out the movies that have not been rated by similar users
mask = similar_users_ratings != 0  # True where rating is available, False where 0

# Mean ratings of the similar users
mean_rating_sim_users = user_mean_ratings[similar_users]

# Mean centered ratings of similar users
mean_centered_similar_user_ratings = similar_users_ratings - mean_rating_sim_users[:, np.newaxis]

# Numerator calculation
numerator = np.sum(mean_centered_similar_user_ratings * similarities[:, np.newaxis] * mask, axis=0)

# Compute sum of absolute similarities (denominator)
denominator = np.sum(np.abs(similarities[:, np.newaxis]) * mask, axis=0)

# Avoid division by zero
with np.errstate(divide="ignore", invalid="ignore"):
predicted_values = numerator / denominator
predicted_values[denominator == 0] = np.nan

# Adding back the mean to the predicted ratings
mean_centered_predicted_ratings = predicted_values + user_mean_ratings[user]
missing_ratings_mask = predicted_ratings[user, :] == 0
missing_indices = np.where(missing_ratings_mask)[0]

predicted_ratings[user, missing_ratings_mask] = mean_centered_predicted_ratings[missing_ratings_mask]

# Extract predicted ratings corresponding to the previously missing values
predicted_vals = predicted_ratings[user][missing_indices]

# Remove NaN values from predicted values
valid_indices = ~np.isnan(predicted_vals)  # Boolean mask to filter valid values
filtered_movies = missing_indices[valid_indices]
filtered_predictions = predicted_vals[valid_indices]

# Store data in lists (adjusting user_id to start from 1)
user_ids.extend([user + 1] * len(filtered_movies))  # Add 1 to user_id
movie_ids.extend(filtered_movies)
predicted_values_list.extend(filtered_predictions)

# Create DataFrame at the end
recommendations_df = pd.DataFrame({
"userId": user_ids,
"movieId": movie_ids,
"predicted_rating":  predicted_values_list
})

# Convert column indices back to movie IDs before storing in DataFrame
recommendations_df["movieId"] = recommendations_df["movieId"].map(col_to_movie_id)

# Sort DataFrame by predicted rating (descending) for better recommendations
recommendations_df = recommendations_df.sort_values(by=["userId","predicted_rating"], ascending=[True,False])

return recommendations_df

@cache
def sigmoid(self) -> pd.DataFrame:
df = self.mean_centered_predicted_ratings_df()
df["predicted_rating"] = np.reciprocal(1 + np.exp(-df["predicted_rating"]))
return df

@cache
def map_to_range(self, min_val, max_val) -> pd.DataFrame:
df = self.sigmoid()

self.min_val = min_val  # Store the last used min_val
self.max_val = max_val  # Store the last used max_val

df["predicted_rating"] = min_val + (max_val - min_val) * df["predicted_rating"]
return df

@cache
def movie_recommend(self, user_id) -> pd.DataFrame:
"""
Returns the top 5 recommended movies for a user.

Parameters:
- user_id (int): The ID of the user.

Returns:
- A DataFrame with top 5 recommended movies (title, genres).
"""
df = self.map_to_range(self.min_val, self.max_val)

# Get top 5 recommended movie IDs for the user
top_5_recommended_movieids = df[df["userId"] == user_id].head(5)[["movieId"]]

# Merge with movies DataFrame to get movie titles and genres
top_5_recommended_movies = pd.merge(top_5_recommended_movieids, self.movies_df, on="movieId")[["title", "genres"]]

return top_5_recommended_movies

Warum geben alle Methoden das gleiche Ergebnis zurück, nachdem ich MAP_TO_RANGE () ?
Wie kann ich dieses Caching -Problem beheben, während ich sicherstellen kann, dass unabhängige Methoden rechnen. besteht.

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post