Commit 4a85dbf2 authored by BRellu's avatar BRellu

optimize prompt

parent 886c124b
import os
import re
from typing import List, Dict
from dotenv import load_dotenv
......@@ -13,8 +11,15 @@ from src.reviewsense_ecom.llm.llm import get_llm
load_dotenv()
class FeatureReview(BaseModel):
feature: str
sentence: str
sentiment: str
confidence: str
class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment
feature_reviews: List[FeatureReview] # Each review will include a sentence and sentiment
class FeatureExtractor:
......@@ -25,40 +30,41 @@ class FeatureExtractor:
def _create_reviews_parser(self) -> JsonOutputParser:
"""Create JSON parser for feature-specific reviews extraction"""
return JsonOutputParser()
return JsonOutputParser(pydantic_object=FeatureReviews)
def _create_extraction_prompt(self) -> ChatPromptTemplate:
"""Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis."""
template = """Extract sentences about the given feature from the list of reviews.
prompt = ChatPromptTemplate.from_messages([
("system", """Extract sentences about the given feature from the list of reviews.
Rules:
- Extract only parts discussing the specific feature.
- Remove unrelated parts connected by 'and' or 'but'.
- Keep original wording and capitalization.
- If there are multiple sentences related a particular feature in a review, merge them into one.
- If there is only one review, apply the same rules to extract sentences about the feature.
Reviews: {reviews}
Feature: {feature}
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
{{
"feature_reviews": [
{{
"feature" : {feature}
"feature" : "feature 1",
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}},
{{
"feature" : {feature}
"feature" : "feature 2",
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}
]
}}
"""
return ChatPromptTemplate.from_template(template)
}}"""),
("user", "{inputFeatures}"),
("user", "{inputReview}"),
])
return prompt
def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]:
"""
......@@ -72,25 +78,10 @@ class FeatureExtractor:
List[Dict[str, str]]: Feature-specific sentences with sentiment analysis.
"""
try:
extracted_reviews = []
sentences = re.split(r'(?<=[.!?])\s+', review) # Split review into sentences
for feature in features:
feature_sentences = [s for s in sentences if feature.lower() in s.lower()]
for sentence in feature_sentences:
result = self.prompt | self.llm | self.parser
response = result.invoke({
"reviews": sentence,
"feature": feature
})
parsed_data = FeatureReviews(**response) # Validate and parse result
extracted_reviews.extend(parsed_data.feature_reviews)
print(f"Responce from LLM : {extracted_reviews}")
return extracted_reviews
chain = self.prompt | self.llm | self.parser
result = chain.invoke({"inputFeatures": f"features : {features}",
"inputReview": review})
return result['feature_reviews']
except Exception as e:
print(f"Error extracting feature reviews: {e}")
return []
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment