Commit cedf25ae authored by Mirza Mohammed Baig's avatar Mirza Mohammed Baig

Code Optimization v1

parent 879c5636
...@@ -3,3 +3,4 @@ ASTRA_DB_APPLICATION_TOKEN="AstraCS:EvXpFFafufegdQJvhqlYxmxt:ef86b5996013b12140b ...@@ -3,3 +3,4 @@ ASTRA_DB_APPLICATION_TOKEN="AstraCS:EvXpFFafufegdQJvhqlYxmxt:ef86b5996013b12140b
ASTRA_DB_NAMESPACE="default_keyspace" ASTRA_DB_NAMESPACE="default_keyspace"
HF_TOKEN="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" HF_TOKEN="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"
GROQ_API_KEY="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" GROQ_API_KEY="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"
MONGODB_URL=mongodb+srv://genai:nisum@123@productrater.hpbsn.mongodb.net/?retryWrites=true&w=majority&appName=productRater
from groq import BaseModel
class PercentageResponseModel(BaseModel):
positive_percentage: str
negative_percentage: str
average_rating: float
\ No newline at end of file
# from dataclasses import dataclass, field
# from datetime import datetime
# from typing import Dict
#
# from src.reviewsense.FeatureRatingModel import FeatureRating
#
#
# @dataclass
# class Product:
# product_id: str
# category: str
# features: Dict[str, FeatureRating]
# ratings_distribution: Dict[int, int]
# overall_rating: float = 0.0
# total_reviews: int = 0
# created_at: datetime = field(default_factory=datetime.utcnow)
# updated_at: datetime = field(default_factory=datetime.utcnow)
#
# @classmethod
# def from_dict(cls, data):
# """Convert a dictionary from MongoDB to a Product instance."""
# return cls(
# product_id=data["product_id"],
# category=data["category"],
# features={k: FeatureRating(**v) for k, v in data["features"].items()},
# ratings_distribution=data["ratings_distribution"],
# overall_rating=data["overall_rating"],
# total_reviews=data["total_reviews"],
# created_at=datetime.fromisoformat(data["created_at"]["$date"].replace("Z", "")),
# updated_at=datetime.fromisoformat(data["updated_at"]["$date"].replace("Z", ""))
# )
#
# def to_dict(self):
# """Convert the data class to a dictionary for MongoDB insertion."""
# return {
# "product_id": self.product_id,
# "category": self.category,
# "features": {k: v.__dict__ for k, v in self.features.items()},
# "ratings_distribution": self.ratings_distribution,
# "overall_rating": self.overall_rating,
# "total_reviews": self.total_reviews,
# "created_at": {"$date": self.created_at.isoformat() + "Z"},
# "updated_at": {"$date": self.updated_at.isoformat() + "Z"}
# }
#
#
#
#
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Dict from typing import Dict
...@@ -61,8 +13,6 @@ class Product: ...@@ -61,8 +13,6 @@ class Product:
ratings_distribution: Dict[str, int] ratings_distribution: Dict[str, int]
overall_rating: float = 0.0 overall_rating: float = 0.0
total_reviews: int = 0 total_reviews: int = 0
# created_at: datetime = field(default_factory=datetime.utcnow)
# updated_at: datetime = field(default_factory=datetime.utcnow)
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) # Use timezone-aware UTC datetime created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) # Use timezone-aware UTC datetime
updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
......
from typing import List, Dict
from groq import BaseModel
class RatingSummary(BaseModel):
ratings_count: int
average_rating: float
rating_distribution: Dict[str, int]
total_reviews: List[str]
\ No newline at end of file
from typing import List, Dict, Optional
from groq import BaseModel
from src.reviewsense_ecom.model.RatingSummary import RatingSummary
class ReviewResponse(BaseModel):
review_ratings: Dict[str, Optional[float]]
ratings_summary: RatingSummary
\ No newline at end of file
from typing import List
from groq import BaseModel
class ReviewResponseModel(BaseModel):
reviews: List[str]
\ No newline at end of file
...@@ -5,12 +5,15 @@ from datetime import datetime, timezone ...@@ -5,12 +5,15 @@ from datetime import datetime, timezone
from src.reviewsense_ecom.model.ProductReview import ProductReview from src.reviewsense_ecom.model.ProductReview import ProductReview
from src.reviewsense_ecom.model.Product import Product from src.reviewsense_ecom.model.Product import Product
from src.reviewsense_ecom.model.productReviewsModel import FeatureSentiment from src.reviewsense_ecom.model.productReviewsModel import FeatureSentiment
from dotenv import load_dotenv
import os
# def get_db_connection(collection_name: str): #LOCAL DB
# client = MongoClient("mongodb://localhost:27017/") # Update with your MongoDB connection
# db = client["productRater"] # Change to your database name
# return db[collection_name] # db["productFeatureRater"] # Change to your collection name
def get_db_N_connection(collection_name: str): #LOCAL DB
client = MongoClient("mongodb://localhost:27017/") # Update with your MongoDB connection
db = client["productRater"] # Change to your database name
return db[collection_name] # db["productFeatureRater"] # Change to your collection name
def get_db_connection(collection_name: str): #get_db_cloud_connection def get_db_connection(collection_name: str): #get_db_cloud_connection
password="nisum@123" password="nisum@123"
escaped_password = urllib.parse.quote_plus(password) escaped_password = urllib.parse.quote_plus(password)
...@@ -37,9 +40,6 @@ def insert_product(product_id): # Use this method to insert if null ...@@ -37,9 +40,6 @@ def insert_product(product_id): # Use this method to insert if null
def update_product(product_id, update_data): def update_product(product_id, update_data):
print(f"Product ID: {product_id}")
print(f"Update Data: {update_data}")
collection = get_db_connection("productFeatureRater") collection = get_db_connection("productFeatureRater")
# Set the updated timestamp # Set the updated timestamp
......
import logging import logging
from typing import Dict, Optional, List
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from src.reviewsense_ecom.model.PercentageResponseModel import PercentageResponseModel
from src.reviewsense_ecom.model.ReviewResponseModel import ReviewResponseModel
from src.reviewsense_ecom.service.FeatureUpdater import FeatureUpdater from src.reviewsense_ecom.service.FeatureUpdater import FeatureUpdater
from src.reviewsense_ecom.model.Product import Product from src.reviewsense_ecom.model.Product import Product
from src.reviewsense_ecom.service.ReviewService import ReviewService from src.reviewsense_ecom.service.ReviewService import ReviewService
...@@ -17,29 +19,8 @@ feature_extractor = FeatureExtractor() ...@@ -17,29 +19,8 @@ feature_extractor = FeatureExtractor()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class RatingSummary(BaseModel):
ratings_count: int
average_rating: float
rating_distribution: Dict[str, int]
total_reviews: List[str]
class ReviewResponse(BaseModel):
review_ratings: Dict[str, Optional[float]]
ratings_summary: RatingSummary
class PercentageResponseModel(BaseModel):
positive_percentage: str
negative_percentage: str
average_rating: float
class ReviewResponseModel(BaseModel):
reviews: List[str]
@router.post("/calculate_ratings/", response_model=Product) @router.post("/calculate_ratings/", response_model=Product)
async def calculate_ratings(input_data: ProductReviewInput): async def calculate_ratings(input_data: ProductReviewInput): # Required
""" """
Endpoint to calculate feature-specific ratings Endpoint to calculate feature-specific ratings
...@@ -74,13 +55,13 @@ async def calculate_ratings(input_data: ProductReviewInput): ...@@ -74,13 +55,13 @@ async def calculate_ratings(input_data: ProductReviewInput):
logger.info(f"Product Data: {product_data}") logger.info(f"Product Data: {product_data}")
final_product = product_data final_product = product_data
if input_data.is_rating_evaluation_required : if input_data.is_rating_evaluation_required :
final_product = await update_product_data(final_product, input_data, product_data) final_product = await update_product_data( input_data, product_data)
logger.info(f"Final Return to Mongo: {final_product}") logger.info(f"Final Return to Mongo: {final_product}")
return final_product return final_product
@router.get("/fetch_percentage/", response_model=PercentageResponseModel) @router.get("/fetch_percentage/", response_model=PercentageResponseModel)
async def fetch_percentage(product_id: str, features: str): async def fetch_percentage(product_id: str, features: str): #NOT REQUIRED
""" """
Fetch reviews from MongoDB based on product_id and feature. Fetch reviews from MongoDB based on product_id and feature.
""" """
...@@ -92,7 +73,7 @@ async def fetch_percentage(product_id: str, features: str): ...@@ -92,7 +73,7 @@ async def fetch_percentage(product_id: str, features: str):
return {"message": "No data found"} return {"message": "No data found"}
@router.get("/fetch_reviews/", response_model=ReviewResponseModel) @router.get("/fetch_reviews/", response_model=ReviewResponseModel)
async def fetch_review_by_feature(product_id: str, features: str): async def fetch_review_by_feature(product_id: str, features: str): #NotRequired
""" """
Fetch reviews from MongoDB based on product_id and feature. Fetch reviews from MongoDB based on product_id and feature.
""" """
...@@ -103,7 +84,7 @@ async def fetch_review_by_feature(product_id: str, features: str): ...@@ -103,7 +84,7 @@ async def fetch_review_by_feature(product_id: str, features: str):
return {"reviews": reviews} return {"reviews": reviews}
@router.get("/fetch_full_reviews/", response_model=ReviewResponseModel) @router.get("/fetch_full_reviews/", response_model=ReviewResponseModel)
async def fetch_review_by_feature(product_id: str, features: str): async def fetch_review_by_feature(product_id: str, features: str): #Required
""" """
Fetch reviews from MongoDB based on product_id and feature. Fetch reviews from MongoDB based on product_id and feature.
""" """
...@@ -114,14 +95,13 @@ async def fetch_review_by_feature(product_id: str, features: str): ...@@ -114,14 +95,13 @@ async def fetch_review_by_feature(product_id: str, features: str):
return {"reviews": reviews} return {"reviews": reviews}
async def update_product_data(final_product, input_data, product_data): async def update_product_data(input_data, product_data):
# ✅ Second Flow: Update Feature Ratings in Another Collection FROM LLM # ✅ Second Flow: Update Feature Ratings in Another Collection FROM LLM
features=product_feature(input_data.product_id) features=product_feature(input_data.product_id)
reviews_by_feature = feature_extractor.extract_feature_reviews( reviews_by_feature = feature_extractor.extract_feature_reviews(
input_data.new_review, features) input_data.new_review, features)
logger.info(f"REVIEW BY LLM --->{reviews_by_feature}")
ratings = review_service.fetch_feature_ratings(reviews_by_feature) ratings = review_service.fetch_feature_ratings(reviews_by_feature)
logger.info(f"Generated ratings: {ratings}") logger.info(f"Generated ratings: {ratings}")
# ✅ First Flow: Add Review to a Separate Collection TO REVIEWS # ✅ First Flow: Add Review to a Separate Collection TO REVIEWS
......
# router/review_service.py
#
# from typing import List, Dict, Optional
#
# from src.reviewsense.mongo_db_config import get_db_connection
# from src.reviewsense.ratings_fetcher import RatingsFetcher
# from src.reviewsense.review_fetcher import ReviewFetcher
# from src.reviewsense.review_rater import ReviewRater
# from src.reviewsense.review_adder import ReviewAdder
# class ReviewService:
# """Service class for handling product reviews"""
#
# def __init__(self, collection_name: str = "android_phone_reviews",
# embedding_model: str = "sentence-transformers/all-mpnet-base-v2"):
# """
# Initialize the review service with vector database and embeddings
# """
# self.fetcher = ReviewFetcher()
# self.rater = ReviewRater()
# self.adder = ReviewAdder()
# self.ratingsFetcher = RatingsFetcher()
#
# def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.5) -> Dict[str, List[str]]:
# return self.fetcher.fetch_reviews(product_id, features, threshold)
#
# def fetch_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[str, Dict[str, Optional[float]]]:
# return self.rater.generate_feature_ratings(reviews_by_feature)
#
# def add_review(self, product_id: str, review: str, rating: int) -> str:
# return self.adder.add_review(product_id, review, rating)
#
# def fetch_ratings(self, product_id):
# return self.ratingsFetcher.fetch_ratings(product_id=product_id)
from typing import List, Dict, Optional from typing import List, Dict, Optional
from src.reviewsense_ecom.mongo.mongo_db_config import get_db_connection from src.reviewsense_ecom.mongo.mongo_db_config import get_db_connection
from src.reviewsense_ecom.service.ratings_fetcher import RatingsFetcher from src.reviewsense_ecom.service.ratings_fetcher import RatingsFetcher
from src.reviewsense_ecom.service.review_fetcher import ReviewFetcher from src.reviewsense_ecom.service.review_fetcher import ReviewFetcher
...@@ -61,41 +25,13 @@ class ReviewService: ...@@ -61,41 +25,13 @@ class ReviewService:
self.adder = ReviewAdder() self.adder = ReviewAdder()
self.ratingsFetcher = RatingsFetcher() self.ratingsFetcher = RatingsFetcher()
def _get_collection(self, collection_name): def _get_collection(self, collection_name):
"""Returns the requested MongoDB collection.""" """Returns the requested MongoDB collection."""
return get_db_connection(collection_name) return get_db_connection(collection_name)
# def get_review_by_feature(self, product_id, feature, collection_name="productReviews"):
# """
# Fetch reviews from MongoDB based on `product_id` and `feature`
# """
# collection = self._get_collection(collection_name)
#
# query = {"product_id": str(product_id)}
# if feature.lower() == "overall":
# projection = {"_id": 0, "review": 1}
# else:
# query[f"features.{feature}"] = {"$exists": True}
# projection = {
# "_id": 0,
# f"features.{feature}.positive": 1,
# f"features.{feature}.negative": 1
# }
#
# reviews = collection.find(query, projection)
#
# result = []
# for review in reviews:
# if feature.lower() == "overall":
# result.append(review.get("review", ""))
# else:
# feature_data = review.get("features", {}).get(feature, {})
# result.extend(feature_data.get("positive", []))
# result.extend(feature_data.get("negative", []))
#
# print(f"result of Afreedi API ----->>> {result}")
# return result
#WORKING
def get_review_by_feature(self, product_id, feature): def get_review_by_feature(self, product_id, feature):
""" """
Fetch reviews from MongoDB based on `product_id` and `feature` Fetch reviews from MongoDB based on `product_id` and `feature`
...@@ -106,7 +42,7 @@ class ReviewService: ...@@ -106,7 +42,7 @@ class ReviewService:
projection = {"_id": 0, "review": 1, "features": 1} projection = {"_id": 0, "review": 1, "features": 1}
reviews = collection.find(query, projection) reviews = collection.find(query, projection)
p=list(collection.find(query, projection)) list(collection.find(query, projection))
result = [] result = []
for review in reviews: for review in reviews:
...@@ -120,7 +56,6 @@ class ReviewService: ...@@ -120,7 +56,6 @@ class ReviewService:
result.extend(feature_data.get("positive", [])) result.extend(feature_data.get("positive", []))
result.extend(feature_data.get("negative", [])) result.extend(feature_data.get("negative", []))
logger.info(f"results------->>>>> {result}")
return result return result
def get_full_review_by_feature(self, product_id, feature): def get_full_review_by_feature(self, product_id, feature):
......
from pydantic import BaseModel from pydantic import BaseModel
from langchain.prompts import ChatPromptTemplate from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser from langchain_core.output_parsers import JsonOutputParser
from typing import List, Optional, Dict from typing import List,Dict
from src.reviewsense_ecom.llm.llm import get_llm from src.reviewsense_ecom.llm.llm import get_llm
import re import re
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
class FeatureReviews(BaseModel): class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment feature_reviews: List[dict] # Each review will include a sentence and sentiment
...@@ -12,7 +16,10 @@ class FeatureReviews(BaseModel): ...@@ -12,7 +16,10 @@ class FeatureReviews(BaseModel):
class FeatureExtractor: class FeatureExtractor:
def __init__(self): def __init__(self):
self.llm = get_llm("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") api_key = os.getenv('GROQ_API_KEY')
if not api_key:
raise ValueError("GROQ_API_KEY is not set in the environment variables.")
self.llm = get_llm(api_key)
self.parser = self._create_reviews_parser() self.parser = self._create_reviews_parser()
self.prompt = self._create_extraction_prompt() self.prompt = self._create_extraction_prompt()
...@@ -51,33 +58,6 @@ class FeatureExtractor: ...@@ -51,33 +58,6 @@ class FeatureExtractor:
""" """
return ChatPromptTemplate.from_template(template) return ChatPromptTemplate.from_template(template)
# def extract_feature_reviews(self, reviews: List[str], feature: str) -> List[dict]:
# """
# Extract feature-specific sentences from reviews with sentiment analysis
#
# Args:
# reviews: List of review texts
# feature: Target feature to extract
#
# Returns:
# List[dict]: Feature-specific sentences with sentiment analysis
# """
# try:
# chain = self.prompt | self.llm | self.parser
# reviews_text = "\n".join(reviews)
#
# result = chain.invoke({
# "reviews": reviews_text,
# "feature": feature
# })
#
# parsed_data = FeatureReviews(**result) # Validate and parse the result
# return parsed_data.feature_reviews
#
# except Exception as e:
# print(f"Error extracting feature reviews: {e}")
# return []
def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]: def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]:
""" """
Extract feature-specific sentences from reviews with sentiment analysis. Extract feature-specific sentences from reviews with sentiment analysis.
......
...@@ -11,28 +11,6 @@ class ReviewRater: ...@@ -11,28 +11,6 @@ class ReviewRater:
def __init__(self): def __init__(self):
self.sentiment_analyzer = pipeline("sentiment-analysis") self.sentiment_analyzer = pipeline("sentiment-analysis")
# def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[str, Optional[float]]:
# feature_ratings = {}
#
# for feature, reviews in reviews_by_feature.items():
# if not reviews:
# feature_ratings[feature] = None
# continue
#
# ratings = []
# for review in reviews:
# sentiment = review.get('sentiment')
# if "positive" in sentiment:
# ratings.append(5)
# elif "negative" in sentiment:
# ratings.append(1)
# else:
# ratings.append(3)
#
# feature_ratings[feature] = round(np.mean(ratings), 1) if ratings else None
#
# return feature_ratings
def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[ def generate_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[
str, Dict[str, Optional[float]]]: str, Dict[str, Optional[float]]]:
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment