Commit 886c124b authored by Mirza Mohammed Baig's avatar Mirza Mohammed Baig

Feature average rating stabilization changes are implemented

parent f9c12b52
......@@ -19,10 +19,7 @@ class FeatureReviews(BaseModel):
class FeatureExtractor:
def __init__(self):
api_key = os.getenv('GROQ_API_KEY')
if not api_key:
raise ValueError("GROQ_API_KEY is not set in the environment variables.")
self.llm = get_llm(api_key)
self.llm = get_llm("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")
self.parser = self._create_reviews_parser()
self.prompt = self._create_extraction_prompt()
......@@ -49,12 +46,14 @@ class FeatureExtractor:
{{
"feature" : {feature}
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral"
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}},
{{
"feature" : {feature}
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral"
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}
]
}}
......
......@@ -27,17 +27,34 @@ class FeatureUpdater:
"positive_count": 0,
"negative_count": 0
}
current_feature = features[feature]
existing_review_count = current_feature["review_count"]
new_rating = new_data.get("rating", 0) or 0
M = 4.5
C = max(2, 10 - (existing_review_count // 20)) # Reduce prior influence faster
# if existing_review_count < 5:
# C = 5
# elif existing_review_count < 50:
# C = 10
# elif existing_review_count < 500:
# C = 15
# else:
# C = 20
new_ratings = new_data.get("ratings", [])
new_rating = sum(new_ratings) / len(new_ratings) if new_ratings else 0
new_positive = new_data.get("positive_count", 0) or 0
new_negative = new_data.get("negative_count", 0) or 0
# Apply the Bayesian Average Formula
new_average_rating = (
(current_feature["average_rating"] * existing_review_count) + new_rating
) / (existing_review_count + 1) if existing_review_count > 0 else new_rating
(current_feature["average_rating"] * existing_review_count) + (
C * M) + new_rating
) / (existing_review_count + C + 1) if existing_review_count > 0 else new_rating
print(f"{new_average_rating}+--------------------------$$$----------------")
features[feature]["average_rating"] = round(new_average_rating, 1)
features[feature]["review_count"] += 1
......
......@@ -14,43 +14,33 @@ class ReviewRater:
def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[
str, Dict[str, Optional[float]]]:
"""
Generate ratings and count sentiment occurrences for each feature.
Args:
reviews_by_feature (List[Dict[str, str]]): List of feature-specific reviews with sentiment.
Returns:
Dict[str, Dict[str, Optional[float]]]: Feature-wise ratings and sentiment counts.
"""
feature_data = defaultdict(lambda: {"ratings": [], "positive_count": 0, "negative_count": 0})
feature_data = defaultdict(
lambda: {"ratings": [], "positive_count": 0, "negative_count": 0, "neutral_count": 0}
)
for review_data in reviews_by_feature:
print("Processing Review:", review_data) # Debugging line
feature = review_data.get("feature")
sentiment = review_data.get("sentiment")
confidence = float(review_data.get("confidence", 1))
if not feature or not sentiment:
print("Skipping due to missing feature or sentiment")
continue # Skip invalid entries
continue
if "positive" in sentiment:
score = 5
score = 3 + 2 * confidence
print(f"confidence value for positive: {confidence}")
feature_data[feature]["positive_count"] += 1
elif "negative" in sentiment:
score = 1
score = 3 - 2 * confidence
print(f"confidence value for negative: {confidence}")
feature_data[feature]["negative_count"] += 1
else:
score = 3 # Neutral sentiment
feature_data[feature]["neutral_count"] = feature_data[feature].get("neutral_count", 0) + 1
score = 3
feature_data[feature]["neutral_count"] += 1
feature_data[feature]["ratings"].append(score)
print(f"Calculated score: {score}--------------------*****------------")
# Compute average ratings
for feature, data in feature_data.items():
data["rating"] = round(np.mean(data["ratings"]), 1) if data["ratings"] else None
del data["ratings"] # Remove temporary list after computing rating
print("Final Feature Data:", dict(feature_data)) # Debugging line
return feature_data
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment