Commit b1dc058d authored by BRellu's avatar BRellu

remove the vector db code

parent 7d8d6f87
......@@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Poetry (reviewsense-ecom)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (featurepulse-ecom)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.12 (featurepulse-ecom)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
from functools import lru_cache
from pydantic.v1 import BaseSettings
class Settings(BaseSettings):
"""Application configuration settings"""
ASTRA_DB_API_ENDPOINT: str = "https://0c52512b-0c65-4e70-ac47-71edf5244a82-us-east-2.apps.astra.datastax.com"
ASTRA_DB_APPLICATION_TOKEN: str = "AstraCS:EvXpFFafufegdQJvhqlYxmxt:ef86b5996013b12140b69254bd554d7e8e10eb5a7137859b9c432f92a5a3b65c"
ASTRA_DB_NAMESPACE: str = "default_keyspace"
# Default settings
EMBEDDING_MODEL: str = "sentence-transformers/all-mpnet-base-v2"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "allow" # Allow extra fields
@lru_cache()
def get_settings():
"""
Cached settings retrieval to optimize performance
Returns:
Settings: Configured application settings
"""
return Settings()
......@@ -5,7 +5,7 @@ from langchain_core.messages import BaseMessage
from langchain_core.prompts import ChatPromptTemplate
@lru_cache(maxsize=1) # Cache only one instance (singleton behavior)
def get_llm():
def get_llm(groq_token: str):
"""
Get a singleton LLM instance for the given Groq token.
......
......@@ -6,11 +6,3 @@ from bson import ObjectId
class FeatureSentiment:
positive: List[str] = field(default_factory=list)
negative: List[str] = field(default_factory=list)
\ No newline at end of file
@dataclass
class ProductReview:
id: Optional[ObjectId] = None
product_id: str = ""
review: str = ""
rating: float = 0.0
features: List[Dict[str, FeatureSentiment]] = field(default_factory=list)
\ No newline at end of file
from dataclasses import dataclass, field
from typing import List, Dict, Optional,Any
from typing import List, Dict, Optional, Any
from bson import ObjectId
from pydantic import Field
@dataclass
class FeatureSentiment:
positive: List[str] = field(default_factory=list)
negative: List[str] = field(default_factory=list)
@dataclass
class ProductReview:
#id: Optional[ObjectId] = None
# id: Optional[ObjectId] = None
product_id: str = ""
review: Optional[str] = Field(default=None, max_length=5000, description="Review text")
rating: float = 0.0
......
from typing import List, Dict, Optional
from groq import BaseModel
from src.reviewsense_ecom.model.RatingSummary import RatingSummary
class ReviewResponse(BaseModel):
review_ratings: Dict[str, Optional[float]]
ratings_summary: RatingSummary
\ No newline at end of file
......@@ -4,7 +4,7 @@ from pymongo import MongoClient
from datetime import datetime, timezone
from src.reviewsense_ecom.model.ProductReview import ProductReview
from src.reviewsense_ecom.model.Product import Product
from src.reviewsense_ecom.model.productReviewsModel import FeatureSentiment
from src.reviewsense_ecom.model.FeatureSentiment import FeatureSentiment
from dotenv import load_dotenv
import os
......
......@@ -2,7 +2,6 @@
from functools import lru_cache
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_astradb import AstraDBVectorStore
from src.reviewsense_ecom.core.config import get_settings
......@@ -21,7 +20,6 @@ def get_vector_store(
Returns:
AstraDBVectorStore: Configured vector store instance
"""
settings = get_settings()
# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
......
......@@ -9,7 +9,7 @@ from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
from src.reviewsense_ecom.llm.llm import get_llm, generate_response
from src.reviewsense_ecom.service.retrieval import get_vector_store
from src.reviewsense_ecom.retriever.retrieval import get_vector_store
logger = getLogger(__name__)
......
from typing import List, Dict, Optional
from src.reviewsense_ecom.mongo.mongo_db_config import get_db_connection
from src.reviewsense_ecom.service.ratings_fetcher import RatingsFetcher
from src.reviewsense_ecom.service.review_fetcher import ReviewFetcher
from src.reviewsense_ecom.service.review_rater import ReviewRater
from src.reviewsense_ecom.service.review_adder import ReviewAdder
class ReviewService:
......@@ -13,25 +9,18 @@ class ReviewService:
def __init__(self, embedding_model="sentence-transformers/all-mpnet-base-v2"):
"""
Initialize the review service with MongoDB connection.
"""
# Vector-based processing components
self.embedding_model = embedding_model
self.fetcher = ReviewFetcher()
self.rater = ReviewRater()
self.adder = ReviewAdder()
self.ratingsFetcher = RatingsFetcher()
def _get_collection(self, collection_name):
"""Returns the requested MongoDB collection."""
return get_db_connection(collection_name)
def get_review_by_feature(self, product_id, feature):
"""
Fetch reviews from MongoDB based on `product_id` and `feature`
......@@ -120,22 +109,5 @@ class ReviewService:
"average_rating": average_rating
}
# 🔹 Fetch reviews using vector-based method
def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.5) -> Dict[str, List[str]]:
return self.fetcher.fetch_reviews(product_id, features, threshold)
# 🔹 Generate feature-specific ratings
def generate_feature_ratings(self, reviews_by_feature: Dict[str, List[str]]) -> Dict[str, Optional[float]]:
return self.rater.generate_feature_ratings(reviews_by_feature)
def fetch_feature_ratings(self, reviews_by_feature: List[Dict[str, str]]) -> Dict[str, Dict[str, Optional[float]]]:
return self.rater.generate_feature_ratings(reviews_by_feature)
# 🔹 Add a new review
def add_review(self, product_id: str, review: str, rating: int) -> str:
return self.adder.add_review(product_id, review, rating)
# 🔹 Fetch product ratings
def fetch_ratings(self, product_id: str):
return self.ratingsFetcher.fetch_ratings(product_id=product_id)
from pydantic import BaseModel
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from typing import List,Dict
from typing import List, Dict
from src.reviewsense_ecom.llm.llm import get_llm
import re
from dotenv import load_dotenv
......@@ -10,6 +10,7 @@ import os
# Load environment variables from .env file
load_dotenv()
class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment
......
# services/review_fetcher.py
from typing import Dict
from src.reviewsense_ecom.service.retrieval import get_vector_store
from src.reviewsense_ecom.service.feature_extractor import FeatureExtractor
from typing import (Any)
class RatingsFetcher:
"""Class for fetching reviews from the vector store"""
def __init__(self):
self.vector_store = get_vector_store()
self.feature_extractor = FeatureExtractor()
def fetch_ratings(self, product_id: str) -> Dict[str, Any]:
rating_distribution = {}
filter_criteria = {"title": product_id}
documents = self.vector_store.metadata_search(filter=filter_criteria, n=100)
ratings_count = len(documents)
total_rating = 0
rated_documents = 0
total_reviews = []
for doc in documents:
total_reviews.append(doc.page_content.lstrip("Review: "))
if 'rating' in doc.metadata:
rating = str(doc.metadata['rating'])
rating_distribution[rating] = rating_distribution.get(rating, 0) + 1
total_rating += float(rating)
rated_documents += 1
# Calculate average rating, handling case where there are no ratings
average_rating = round(total_rating / rated_documents, 2) if rated_documents > 0 else 0
return {
"ratings_count": ratings_count,
"average_rating": average_rating,
"rating_distribution": rating_distribution,
"total_reviews": total_reviews
}
# router/review_adder.py
from langchain_community.docstore.document import Document
from src.reviewsense_ecom.service.retrieval import get_vector_store
class ReviewAdder:
"""Class for adding reviews to the vector store"""
def __init__(self):
self.vector_store = get_vector_store()
def add_review(self, product_id: str, review: str, rating: int) -> str:
review_document = Document(page_content=review, metadata={"title": product_id, "rating": rating})
return self.vector_store.add_documents([review_document])[0]
# router/review_fetcher.py
from typing import List, Dict
from src.reviewsense_ecom.service.retrieval import get_vector_store
from src.reviewsense_ecom.service.feature_extractor import FeatureExtractor
class ReviewFetcher:
"""Class for fetching reviews from the vector store"""
def __init__(self):
self.vector_store = get_vector_store()
self.feature_extractor = FeatureExtractor()
def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.5) -> Dict[str, List[str]]:
feature_reviews = {}
for feature in features:
filter_criteria = {"title": product_id}
# documents = self.vector_store.similarity_search_with_score_id(query=self.generate_feature_query(feature), k=100, filter=filter_criteria)
documents = self.vector_store.similarity_search_with_score_id(query=feature, k=100, filter=filter_criteria)
if len(documents) != 0:
filtered_reviews = [doc.page_content for doc, score, _ in documents if score > threshold]
if len(filtered_reviews) != 0:
extracted_reviews = self.feature_extractor.extract_feature_reviews(filtered_reviews, feature)
if len(extracted_reviews) != 0:
feature_reviews[feature] = extracted_reviews
return feature_reviews
def generate_feature_query(self, feature: str) -> str:
"""
Generate semantic search query for different product features
Args:
feature (str): Product feature to search
Returns:
str: Semantic search query
"""
feature_queries = {
"battery": "battery performance, battery life, charging speed, power consumption, battery drain, long-lasting battery",
"backup": "data backup, storage capacity, backup options, cloud storage, data protection, file recovery",
"design": "build quality, aesthetic design, material, form factor, ergonomics, look and feel, physical appearance",
"display": "screen quality, display resolution, color accuracy, brightness, viewing angles, screen technology, display performance"
}
# Default to feature name if not found
return feature_queries.get(feature.lower(), feature)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment