Commit 886c124b authored by Mirza Mohammed Baig's avatar Mirza Mohammed Baig

Feature average rating stabilization changes are implemented

parent f9c12b52
...@@ -19,10 +19,7 @@ class FeatureReviews(BaseModel): ...@@ -19,10 +19,7 @@ class FeatureReviews(BaseModel):
class FeatureExtractor: class FeatureExtractor:
def __init__(self): def __init__(self):
api_key = os.getenv('GROQ_API_KEY') self.llm = get_llm("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")
if not api_key:
raise ValueError("GROQ_API_KEY is not set in the environment variables.")
self.llm = get_llm(api_key)
self.parser = self._create_reviews_parser() self.parser = self._create_reviews_parser()
self.prompt = self._create_extraction_prompt() self.prompt = self._create_extraction_prompt()
...@@ -49,12 +46,14 @@ class FeatureExtractor: ...@@ -49,12 +46,14 @@ class FeatureExtractor:
{{ {{
"feature" : {feature} "feature" : {feature}
"sentence": "relevant sentence 1", "sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral" "sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}, }},
{{ {{
"feature" : {feature} "feature" : {feature}
"sentence": "relevant sentence 2", "sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral" "sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}} }}
] ]
}} }}
......
...@@ -27,17 +27,34 @@ class FeatureUpdater: ...@@ -27,17 +27,34 @@ class FeatureUpdater:
"positive_count": 0, "positive_count": 0,
"negative_count": 0 "negative_count": 0
} }
current_feature = features[feature] current_feature = features[feature]
existing_review_count = current_feature["review_count"] existing_review_count = current_feature["review_count"]
new_rating = new_data.get("rating", 0) or 0 M = 4.5
C = max(2, 10 - (existing_review_count // 20)) # Reduce prior influence faster
# if existing_review_count < 5:
# C = 5
# elif existing_review_count < 50:
# C = 10
# elif existing_review_count < 500:
# C = 15
# else:
# C = 20
new_ratings = new_data.get("ratings", [])
new_rating = sum(new_ratings) / len(new_ratings) if new_ratings else 0
new_positive = new_data.get("positive_count", 0) or 0 new_positive = new_data.get("positive_count", 0) or 0
new_negative = new_data.get("negative_count", 0) or 0 new_negative = new_data.get("negative_count", 0) or 0
# Apply the Bayesian Average Formula
new_average_rating = ( new_average_rating = (
(current_feature["average_rating"] * existing_review_count) + new_rating (current_feature["average_rating"] * existing_review_count) + (
) / (existing_review_count + 1) if existing_review_count > 0 else new_rating C * M) + new_rating
) / (existing_review_count + C + 1) if existing_review_count > 0 else new_rating
print(f"{new_average_rating}+--------------------------$$$----------------")
features[feature]["average_rating"] = round(new_average_rating, 1) features[feature]["average_rating"] = round(new_average_rating, 1)
features[feature]["review_count"] += 1 features[feature]["review_count"] += 1
...@@ -56,4 +73,4 @@ class FeatureUpdater: ...@@ -56,4 +73,4 @@ class FeatureUpdater:
self.product_data["overall_rating"] = round(new_overall_rating, 1) self.product_data["overall_rating"] = round(new_overall_rating, 1)
self.product_data["total_reviews"] = updated_total_reviews self.product_data["total_reviews"] = updated_total_reviews
return self.product_data return self.product_data
\ No newline at end of file
...@@ -14,43 +14,33 @@ class ReviewRater: ...@@ -14,43 +14,33 @@ class ReviewRater:
def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[ def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[
str, Dict[str, Optional[float]]]: str, Dict[str, Optional[float]]]:
""" feature_data = defaultdict(
Generate ratings and count sentiment occurrences for each feature. lambda: {"ratings": [], "positive_count": 0, "negative_count": 0, "neutral_count": 0}
)
Args:
reviews_by_feature (List[Dict[str, str]]): List of feature-specific reviews with sentiment.
Returns:
Dict[str, Dict[str, Optional[float]]]: Feature-wise ratings and sentiment counts.
"""
feature_data = defaultdict(lambda: {"ratings": [], "positive_count": 0, "negative_count": 0})
for review_data in reviews_by_feature: for review_data in reviews_by_feature:
print("Processing Review:", review_data) # Debugging line
feature = review_data.get("feature") feature = review_data.get("feature")
sentiment = review_data.get("sentiment") sentiment = review_data.get("sentiment")
confidence = float(review_data.get("confidence", 1))
if not feature or not sentiment: if not feature or not sentiment:
print("Skipping due to missing feature or sentiment") continue
continue # Skip invalid entries
if "positive" in sentiment: if "positive" in sentiment:
score = 5 score = 3 + 2 * confidence
print(f"confidence value for positive: {confidence}")
feature_data[feature]["positive_count"] += 1 feature_data[feature]["positive_count"] += 1
elif "negative" in sentiment: elif "negative" in sentiment:
score = 1 score = 3 - 2 * confidence
print(f"confidence value for negative: {confidence}")
feature_data[feature]["negative_count"] += 1 feature_data[feature]["negative_count"] += 1
else: else:
score = 3 # Neutral sentiment score = 3
feature_data[feature]["neutral_count"] = feature_data[feature].get("neutral_count", 0) + 1 feature_data[feature]["neutral_count"] += 1
feature_data[feature]["ratings"].append(score) feature_data[feature]["ratings"].append(score)
print(f"Calculated score: {score}--------------------*****------------")
# Compute average ratings
for feature, data in feature_data.items():
data["rating"] = round(np.mean(data["ratings"]), 1) if data["ratings"] else None
del data["ratings"] # Remove temporary list after computing rating
print("Final Feature Data:", dict(feature_data)) # Debugging line
return feature_data return feature_data
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment