Commit e3465f0d authored by BRellu's avatar BRellu

refactor

parent 0b88f5f4
......@@ -11,31 +11,7 @@ router = APIRouter()
review_service = ReviewService()
@router.post("/calculate_ratings/", response_model=Dict[str, Optional[float]])
async def calculate_ratings(input_data: ProductReviewInput):
"""
Endpoint to calculate feature-specific ratings
Args:
input_data (ProductReviewInput): Input data for review processing
Returns:
Dictionary of feature ratings
"""
# Add new review if provided
if input_data.new_review:
review_service.add_review(
product_id=input_data.product_id,
review=input_data.new_review
)
# Fetch and analyze reviews
reviews_by_feature = review_service.fetch_reviews(
product_id=input_data.product_id,
features=input_data.features
)
# Generate and return feature ratings
return review_service.generate_feature_ratings(reviews_by_feature)
@router.post("/chat")
async def chat(user_message: UserMessage):
......
......@@ -2,4 +2,4 @@ from pydantic import BaseModel, Field
from typing import List, Optional
class UserMessage(BaseModel):
message: str = "suggest me a mobile with okay battery backup"
\ No newline at end of file
message: str = "suggest me a mobile with good battery backup"
\ No newline at end of file
from functools import lru_cache
from pydantic.v1 import BaseSettings
class Settings(BaseSettings):
"""Application configuration settings"""
ASTRA_DB_API_ENDPOINT: str = "https://0c52512b-0c65-4e70-ac47-71edf5244a82-us-east-2.apps.astra.datastax.com"
ASTRA_DB_APPLICATION_TOKEN: str = "AstraCS:EvXpFFafufegdQJvhqlYxmxt:ef86b5996013b12140b69254bd554d7e8e10eb5a7137859b9c432f92a5a3b65c"
ASTRA_DB_NAMESPACE: str = "default_keyspace"
# Default settings
EMBEDDING_MODEL: str = "sentence-transformers/all-mpnet-base-v2"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "allow" # Allow extra fields
@lru_cache()
def get_settings():
"""
Cached settings retrieval to optimize performance
Returns:
Settings: Configured application settings
"""
return Settings()
......@@ -61,41 +61,6 @@ class BotService:
logger.error(f"Failed to initialize vector store: {e}")
raise VectorStoreError(f"Vector store initialization failed: {e}")
def get_product_reviews(
self,
product_id: str,
query: str,
k: int = 1
) -> str:
"""
Retrieve and combine product reviews from the vector store.
Args:
product_id: Product identifier
query: Search query
k: Number of results to retrieve
Returns:
Combined review string
"""
try:
results = self.vector_store.similarity_search_with_score_id(
query=query,
k=k,
filter={"product_name": product_id}
)
all_reviews = []
for doc, _, _ in results:
reviews = ReviewProcessor.process_reviews(doc.page_content)
all_reviews.extend(reviews)
return ' '.join(all_reviews)
except Exception as e:
logger.error(f"Error retrieving product reviews: {e}")
raise VectorStoreError(f"Failed to retrieve reviews: {e}")
def retrieve_context(
self,
query: str,
......
# services/review_service.py
from typing import List, Dict, Optional
from reviewsense.services.review_fetcher import ReviewFetcher
from reviewsense.services.review_rater import ReviewRater
from reviewsense.services.review_adder import ReviewAdder
class ReviewService:
"""Service class for handling product reviews"""
def __init__(self, collection_name: str = "android_phone_reviews", embedding_model: str = "sentence-transformers/all-mpnet-base-v2"):
"""
Initialize the review service with vector database and embeddings
"""
self.fetcher = ReviewFetcher()
self.rater = ReviewRater()
self.adder = ReviewAdder()
def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.6) -> Dict[str, List[str]]:
return self.fetcher.fetch_reviews(product_id, features, threshold)
def generate_feature_ratings(self, reviews_by_feature: Dict[str, List[str]]) -> Dict[str, Optional[float]]:
return self.rater.generate_feature_ratings(reviews_by_feature)
def add_review(self, product_id: str, review: str) -> str:
return self.adder.add_review(product_id, review)
from pydantic import BaseModel
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from typing import List, Optional
from reviewsense.core.llm import get_llm
class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment
class FeatureExtractor:
def __init__(self):
self.llm = get_llm()
self.parser = self._create_reviews_parser()
self.prompt = self._create_extraction_prompt()
def _create_reviews_parser(self) -> JsonOutputParser:
"""Create JSON parser for feature-specific reviews extraction"""
return JsonOutputParser()
def _create_extraction_prompt(self) -> ChatPromptTemplate:
"""Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis."""
template = """Extract sentences about the given feature from the list of reviews.
Rules:
- Extract only parts discussing the specific feature.
- Remove unrelated parts connected by 'and' or 'but'.
- Keep original wording and capitalization.
- If there is only one review, apply the same rules to extract sentences about the feature.
Reviews: {reviews}
Feature: {feature}
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
{{
"feature_reviews": [
{{
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral"
}},
{{
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral"
}}
]
}}
"""
return ChatPromptTemplate.from_template(template)
def extract_feature_reviews(self, reviews: List[str], feature: str) -> List[dict]:
"""
Extract feature-specific sentences from reviews with sentiment analysis
Args:
reviews: List of review texts
feature: Target feature to extract
Returns:
List[dict]: Feature-specific sentences with sentiment analysis
"""
try:
chain = self.prompt | self.llm | self.parser
reviews_text = "\n".join(reviews)
result = chain.invoke({
"reviews": reviews_text,
"feature": feature
})
parsed_data = FeatureReviews(**result) # Validate and parse the result
return parsed_data.feature_reviews
except Exception as e:
print(f"Error extracting feature reviews: {e}")
return []
# services/review_adder.py
from langchain_community.docstore.document import Document
from .retrieval import get_vector_store
class ReviewAdder:
"""Class for adding reviews to the vector store"""
def __init__(self):
self.vector_store = get_vector_store()
def add_review(self, product_id: str, review: str) -> str:
review_document = Document(page_content=review, metadata={"title": product_id})
return self.vector_store.add_documents([review_document])[0]
# services/review_fetcher.py
from typing import List, Dict
from .retrieval import get_vector_store
from .feature_extractor import FeatureExtractor
class ReviewFetcher:
"""Class for fetching reviews from the vector store"""
def __init__(self):
self.vector_store = get_vector_store()
self.feature_extractor = FeatureExtractor()
def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.6) -> Dict[str, List[str]]:
feature_reviews = {}
for feature in features:
filter_criteria = {"title": product_id}
# documents = self.vector_store.similarity_search_with_score_id(query=self.generate_feature_query(feature), k=100, filter=filter_criteria)
documents = self.vector_store.similarity_search()
if len(documents) != 0:
filtered_reviews = [doc.page_content for doc, score, _ in documents if score > threshold]
if len(filtered_reviews) != 0 :
extracted_reviews = self.feature_extractor.extract_feature_reviews(filtered_reviews, feature)
if len(extracted_reviews)!=0 :
feature_reviews[feature] = extracted_reviews
return feature_reviews
def generate_feature_query(self, feature: str) -> str:
"""
Generate semantic search query for different product features
Args:
feature (str): Product feature to search
Returns:
str: Semantic search query
"""
feature_queries = {
"battery": "battery performance, battery life, charging speed, power consumption, battery drain, long-lasting battery",
"backup": "data backup, storage capacity, backup options, cloud storage, data protection, file recovery",
"design": "build quality, aesthetic design, material, form factor, ergonomics, look and feel, physical appearance",
"display": "screen quality, display resolution, color accuracy, brightness, viewing angles, screen technology, display performance"
}
# Default to feature name if not found
return feature_queries.get(feature.lower(), feature)
# services/review_rater.py
from typing import Dict, List, Optional
import numpy as np
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import pipeline
class ReviewRater:
"""Class for generating feature-specific ratings based on sentiment analysis"""
def __init__(self):
self.sentiment_analyzer = pipeline("sentiment-analysis")
def generate_feature_ratings(self, reviews_by_feature: Dict[str, List[dict]]) -> Dict[str, Optional[float]]:
feature_ratings = {}
for feature, reviews in reviews_by_feature.items():
if not reviews:
feature_ratings[feature] = None
continue
ratings = []
for review in reviews:
sentiment = review.get('sentiment')
if "positive" in sentiment:
ratings.append(5)
elif "negative" in sentiment:
ratings.append(1)
else:
ratings.append(3)
feature_ratings[feature] = round(np.mean(ratings), 1) if ratings else None
return feature_ratings
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment