Initialize ReviewSense project with core setup

91c75467 · BRellu · Bhargava Rellu · 60259d02 · 91c75467 · 91c75467
Commit 91c75467 authored Jan 24, 2025 by BRellu Committed by Bhargava Rellu Jan 24, 2025
14 changed files
--- a/.env
+++ b/.env
+ASTRA_DB_API_ENDPOINT="https://0c52512b-0c65-4e70-ac47-71edf5244a82-us-east-2.apps.astra.datastax.com"
+ASTRA_DB_APPLICATION_TOKEN="AstraCS:iokawYtcXZopdIyRQIghuror:ea94254f5d22f2a7d3f4d27937bb8455c3cc8a2f23837d99d2557e85c9d5dbe8"
+ASTRA_DB_NAMESPACE="default_keyspace"
+HF_TOKEN="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"
\ No newline at end of file
--- a/README.md
+++ b/README.md
-# ReviewSense
+# FeaturePulse
-ReviewSense: An AI-powered Product Review Analysis Platform
+FeaturePulse is an AI-driven platform that transforms unstructured product reviews into actionable, feature-specific insights. It uses advanced technologies like semantic review extraction and sentiment analysis to generate intelligent ratings, providing businesses with real-time review insights to improve products and make data-driven decisions.
-Key Features:
+Key Features
- Semantic review extraction using advanced vector search
+Semantic Review Extraction: Leverages advanced vector search to extract meaningful information from product reviews.
- Feature-specific sentiment analysis
+Feature-Specific Sentiment Analysis: Analyzes sentiment for individual product features.
- Intelligent rating generation
+Intelligent Rating Generation: Generates accurate ratings based on feature-specific feedback.
- Real-time review insights for products
+Real-Time Review Insights: Offers real-time analysis of product reviews to assist with timely decision-making.
+Core Technology
+Machine Learning: Sentiment assessment driven by sophisticated ML models.
+Vector Database: Efficient storage and retrieval of product reviews using a vector database.
+Natural Language Processing (NLP): Advanced NLP techniques for review extraction and analysis.
+Use Cases
+E-Commerce Product Evaluation: Analyze product reviews to evaluate customer satisfaction and feature performance.
+Consumer Electronics Feedback Analysis: Focus on specific features of electronic products for detailed feedback insights.
+Market Research Insights: Extract key trends and sentiments from large volumes of product reviews.
+Product Development Feedback: Gain feature-specific insights to guide product improvement and development.
+Technologies
+FastAPI: High-performance web framework for building APIs.
+AstraDB: NoSQL database used for storing and retrieving product reviews efficiently.
+HuggingFace ML Models: State-of-the-art models for sentiment analysis and natural language understanding.
+Langchain: Framework for developing applications powered by large language models (LLMs).
+Unique Value Proposition
+Transform unstructured product reviews into actionable, feature-specific insights using cutting-edge AI technology to support product evaluation, market research, and development.
-Core Technology:
+Steps to run local:
- Machine learning-driven sentiment assessment
+pip install poetry
- Vector database for efficient review storage and retrieval
+poetry install
- Advanced natural language processing
-Use Cases:
- E-commerce product evaluation
- Consumer electronics feedback analysis
- Market research insights
- Product development feedback
-Technologies:
- FastAPI
- AstraDB
- HuggingFace ML models
- Langchain
-Unique Value Proposition: Transform unstructured product reviews into actionable, feature-specific insights using cutting-edge AI technology.
\ No newline at end of file
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
+[tool.poetry]
+name = "reviewsense"
+version = "0.1.0"
+description = "AI-powered Product Review Analysis Platform"
+authors = ["Your Name <your.email@example.com>"]
+[tool.poetry.dependencies]
+python = "^3.9"
+fastapi = "^0.115.7"
+uvicorn = "^0.22.0"
+langchain-community = "^0.2.0"
+langchain-huggingface = "^0.0.3"
+langchain-core = "^0.2.0"
+langchain_astradb = "^0.3.3"
+python-dotenv = "^1.0.0"
+transformers = "^4.30.0"
+numpy = "^1.24.0"
+pydantic = "^2.10.6"
+[tool.poetry.dev-dependencies]
+pytest = "^7.0.0"
+black = "^23.0.0"
+mypy = "^1.0.0"
+isort = "^5.0.0"
+[build-system]
+requires = ["poetry-core>=1.1.0"]
+build-backend = "poetry.core.masonry.api"
--- a/src/reviewsense/api/__pycache__/routes.cpython-310.pyc
+++ b/src/reviewsense/api/__pycache__/routes.cpython-310.pyc
--- a/src/reviewsense/api/routes.py
+++ b/src/reviewsense/api/routes.py
+from fastapi import APIRouter
+from typing import Dict, Optional
+from reviewsense.api.schemas.product_review_input import ProductReviewInput
+from reviewsense.service.ReviewService import ReviewService
+router = APIRouter()
+review_service = ReviewService()
+@router.post("/calculate_ratings/", response_model=Dict[str, Optional[float]])
+async def calculate_ratings(input_data: ProductReviewInput):
+    """
+    Endpoint to calculate feature-specific ratings
+    Args:
+        input_data (ProductReviewInput): Input data for review processing
+    Returns:
+        Dictionary of feature ratings
+    """
+    # Add new review if provided
+    if input_data.new_review:
+        review_service.add_review(
+            product_id=input_data.product_id, 
+            review=input_data.new_review
+        )
+    # Fetch and analyze reviews
+    reviews_by_feature = review_service.fetch_reviews(
+        product_id=input_data.product_id, 
+        features=input_data.features
+    )
+    # Generate and return feature ratings
+    return review_service.generate_feature_ratings(reviews_by_feature)
--- a/src/reviewsense/api/schemas/__pycache__/ProductReviewInput.cpython-310.pyc
+++ b/src/reviewsense/api/schemas/__pycache__/ProductReviewInput.cpython-310.pyc
--- a/src/reviewsense/api/schemas/__pycache__/product_review_input.cpython-310.pyc
+++ b/src/reviewsense/api/schemas/__pycache__/product_review_input.cpython-310.pyc
--- a/src/reviewsense/api/schemas/product_review_input.py
+++ b/src/reviewsense/api/schemas/product_review_input.py
+from pydantic import BaseModel, Field
+from typing import List, Optional
+class ProductReviewInput(BaseModel):
+    """Input model for product review processing"""
+    product_id: str = Field(..., description="Unique identifier for the product")
+    features: List[str] = Field(..., description="Features to analyze")
+    new_review: Optional[str] = Field(default=None, description="Optional new review to add")
+    is_rating_evaluation_required: bool = Field(
+        default=True, 
+        description="Flag to determine if rating evaluation is needed"
+    )
+    class Config:
+        """Pydantic model configuration"""
+        json_schema_extra = {
+            "example": {
+                "product_id": "iPhone 15 Pro Max",
+                "features": ["camera", "battery"],
+                "new_review": "Great phone with excellent battery life!",
+                "is_rating_evaluation_required": True
+            }
+        }
\ No newline at end of file
--- a/src/reviewsense/core/__pycache__/config.cpython-310.pyc
+++ b/src/reviewsense/core/__pycache__/config.cpython-310.pyc
--- a/src/reviewsense/core/config.py
+++ b/src/reviewsense/core/config.py
+from pydantic_settings import BaseSettings
+from functools import lru_cache
+class Settings(BaseSettings):
+    """Application configuration settings"""
+    ASTRA_DB_API_ENDPOINT: str
+    ASTRA_DB_APPLICATION_TOKEN: str
+    ASTRA_DB_NAMESPACE: str
+    # Default settings
+    EMBEDDING_MODEL: str = "sentence-transformers/all-mpnet-base-v2"
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+        extra = "allow"  # Allow extra fields
+@lru_cache()
+def get_settings():
+    """
+    Cached settings retrieval to optimize performance
+    Returns:
+        Settings: Configured application settings
+    """
+    return Settings()
--- a/src/reviewsense/main.py
+++ b/src/reviewsense/main.py
+from fastapi import FastAPI
+from reviewsense.api.routes import router
+from reviewsense.core.config import get_settings
+def create_app() -> FastAPI:
+    """
+    Create and configure the FastAPI application
+    Returns:
+        FastAPI: Configured application instance
+    """
+    app = FastAPI(
+        title="ReviewSense",
+        description="AI-powered Product Review Analysis Platform"
+    )
+    # Include routes
+    app.include_router(router)
+    return app
+app = create_app()
+if __name__ == '__main__':
+    import uvicorn
+    uvicorn.run(app, host='0.0.0.0', port=8000)
--- a/src/reviewsense/service/ReviewService.py
+++ b/src/reviewsense/service/ReviewService.py
+import os
+import numpy as np
+from typing import List, Dict, Optional
+from langchain_community.docstore.document import Document
+from langchain_astradb import AstraDBVectorStore
+from langchain_huggingface import HuggingFaceEmbeddings
+from transformers import pipeline
+from reviewsense.core.config import get_settings
+__all__ = ['ReviewService'] 
+class ReviewService:
+    """Service class for handling product reviews"""
+    def __init__(
+        self, 
+        collection_name: str = "android_phone_reviews", 
+        embedding_model: str = "sentence-transformers/all-mpnet-base-v2"
+    ):
+        """
+        Initialize the review service with vector database and embeddings
+        Args:
+            collection_name (str): Name of the vector database collection
+            embedding_model (str): Hugging Face embedding model to use
+        """
+        settings = get_settings()
+        # Initialize embeddings
+        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
+        # Initialize the vector store
+        self.vector_store = AstraDBVectorStore(
+            collection_name=collection_name,
+            embedding=self.embeddings,
+            api_endpoint=settings.ASTRA_DB_API_ENDPOINT,
+            token=settings.ASTRA_DB_APPLICATION_TOKEN,
+            namespace=settings.ASTRA_DB_NAMESPACE,
+        )
+        # Initialize sentiment analyzer
+        self.sentiment_analyzer = pipeline("sentiment-analysis")
+    # [Rest of the methods remain the same as in the original implementation]
+    def fetch_reviews(self, product_id: str, features: List[str], threshold: float = 0.6) -> Dict[str, List[str]]:
+        """
+        Fetch reviews for specific product features
+        Args:
+            product_id (str): Identifier for the product
+            features (List[str]): Features to extract reviews for
+            threshold (float): Similarity threshold for review filtering
+        Returns:
+            Dict containing reviews for each feature
+        """
+        feature_reviews = {}
+        for feature in features:
+            # Perform similarity search for the current feature
+            filter_criteria = {"title": product_id}
+            documents = self.vector_store.similarity_search_with_score_id(
+                query=feature, 
+                k=100, 
+                filter=filter_criteria
+            )
+            # Filter and collect reviews
+            filtered_reviews = [
+                doc.page_content for doc, score, _ in documents if score > threshold
+            ]
+            # Add the list of reviews to the result dictionary
+            feature_reviews[feature] = filtered_reviews
+        return feature_reviews
+    def generate_feature_ratings(self, reviews_by_feature: Dict[str, List[str]]) -> Dict[str, Optional[float]]:
+        """
+        Generate ratings for each feature based on sentiment analysis
+        Args:
+            reviews_by_feature (Dict): Dictionary of reviews grouped by feature
+        Returns:
+            Dictionary of feature ratings
+        """
+        feature_ratings = {}
+        for feature, reviews in reviews_by_feature.items():
+            if not reviews:
+                feature_ratings[feature] = None
+                continue
+            # Analyze sentiment for each review
+            ratings = []
+            for review in reviews:
+                sentiment = self.sentiment_analyzer(review)[0]
+                # Map sentiment to numerical score
+                if "positive" in sentiment["label"].lower():
+                    ratings.append(5)
+                elif "negative" in sentiment["label"].lower():
+                    ratings.append(1)
+                else:  # Neutral sentiment
+                    ratings.append(3)
+            # Compute average rating
+            feature_ratings[feature] = round(np.mean(ratings), 1) if ratings else None
+        return feature_ratings
+    def add_review(self, product_id: str, review: str) -> str:
+        """
+        Add a new review to the vector database
+        Args:
+            product_id (str): Identifier for the product
+            review (str): Review text to add
+        Returns:
+            ID of the added document
+        """
+        # Create a new document with the review
+        review_document = Document(
+            page_content=review, 
+            metadata={"title": product_id}
+        )
+        # Add document to vector store
+        return self.vector_store.add_documents([review_document])[0]
--- a/src/reviewsense/service/__pycache__/ReviewService.cpython-310.pyc
+++ b/src/reviewsense/service/__pycache__/ReviewService.cpython-310.pyc