optimize prompt

4a85dbf2 · BRellu · 886c124b · 4a85dbf2
Commit 4a85dbf2 authored Apr 06, 2025 by BRellu
Hide whitespace changes
Inline Side-by-side

Showing with 43 additions and 52 deletions

FeatureExtractor.py src/reviewsense_ecom/service/FeatureExtractor.py +43 -52

No files found.
--- a/src/reviewsense_ecom/service/FeatureExtractor.py
+++ b/src/reviewsense_ecom/service/FeatureExtractor.py
-import os
-import re
 from typing import List, Dict

 from dotenv import load_dotenv
@@ -13,8 +11,15 @@ from src.reviewsense_ecom.llm.llm import get_llm
 load_dotenv()


+class FeatureReview(BaseModel):
+    feature: str
+    sentence: str
+    sentiment: str
+    confidence: str
+
+
 class FeatureReviews(BaseModel):
-    feature_reviews: List[dict]  # Each review will include a sentence and sentiment
+    feature_reviews: List[FeatureReview]  # Each review will include a sentence and sentiment


 class FeatureExtractor:
@@ -25,40 +30,41 @@ class FeatureExtractor:

    def _create_reviews_parser(self) -> JsonOutputParser:
        """Create JSON parser for feature-specific reviews extraction"""
-        return JsonOutputParser()
+        return JsonOutputParser(pydantic_object=FeatureReviews)

    def _create_extraction_prompt(self) -> ChatPromptTemplate:
        """Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis."""
-        template = """Extract sentences about the given feature from the list of reviews.
-
-        Rules:
-        - Extract only parts discussing the specific feature.
-        - Remove unrelated parts connected by 'and' or 'but'.
-        - Keep original wording and capitalization.
-        - If there is only one review, apply the same rules to extract sentences about the feature.
-
-        Reviews: {reviews}
-        Feature: {feature}
-
-        Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
-        {{
-            "feature_reviews": [
-                {{ 
-                    "feature" : {feature}
-                    "sentence": "relevant sentence 1",
-                    "sentiment": "positive/negative/neutral",
-                    "confidence": "confidence score between 0 and 1"
-                }},
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", """Extract sentences about the given feature from the list of reviews.
+
+                Rules:
+                - Extract only parts discussing the specific feature.
+                - Remove unrelated parts connected by 'and' or 'but'.
+                - Keep original wording and capitalization.
+                - If there are multiple sentences related a particular feature in a review, merge them into one.
+                - If there is only one review, apply the same rules to extract sentences about the feature.
+
+                Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
                {{
-                    "feature" : {feature}
-                    "sentence": "relevant sentence 2",
-                    "sentiment": "positive/negative/neutral",
-                    "confidence": "confidence score between 0 and 1"
-                }}
-            ]
-        }}
-        """
-        return ChatPromptTemplate.from_template(template)
+                    "feature_reviews": [
+                        {{ 
+                            "feature" : "feature 1",
+                            "sentence": "relevant sentence 1",
+                            "sentiment": "positive/negative/neutral",
+                            "confidence": "confidence score between 0 and 1"
+                        }},
+                        {{
+                            "feature" : "feature 2",
+                            "sentence": "relevant sentence 2",
+                            "sentiment": "positive/negative/neutral",
+                            "confidence": "confidence score between 0 and 1"
+                        }}
+                    ]
+                }}"""),
+            ("user", "{inputFeatures}"),
+            ("user", "{inputReview}"),
+        ])
+        return prompt

    def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]:
        """
@@ -72,25 +78,10 @@ class FeatureExtractor:
            List[Dict[str, str]]: Feature-specific sentences with sentiment analysis.
        """
        try:
-            extracted_reviews = []
-            sentences = re.split(r'(?<=[.!?])\s+', review)  # Split review into sentences
-
-            for feature in features:
-                feature_sentences = [s for s in sentences if feature.lower() in s.lower()]
-
-                for sentence in feature_sentences:
-                    result = self.prompt | self.llm | self.parser
-                    response = result.invoke({
-                        "reviews": sentence,
-                        "feature": feature
-                    })
-
-                    parsed_data = FeatureReviews(**response)  # Validate and parse result
-                    extracted_reviews.extend(parsed_data.feature_reviews)
-            print(f"Responce from LLM : {extracted_reviews}")
-
-            return extracted_reviews
-
+            chain = self.prompt | self.llm | self.parser
+            result = chain.invoke({"inputFeatures": f"features : {features}",
+                                   "inputReview": review})
+            return result['feature_reviews']
        except Exception as e:
            print(f"Error extracting feature reviews: {e}")
            return []