Commit 4a85dbf2 authored by BRellu's avatar BRellu

optimize prompt

parent 886c124b
import os
import re
from typing import List, Dict
from dotenv import load_dotenv
......@@ -13,8 +11,15 @@ from src.reviewsense_ecom.llm.llm import get_llm
load_dotenv()
class FeatureReview(BaseModel):
feature: str
sentence: str
sentiment: str
confidence: str
class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment
feature_reviews: List[FeatureReview] # Each review will include a sentence and sentiment
class FeatureExtractor:
......@@ -25,40 +30,41 @@ class FeatureExtractor:
def _create_reviews_parser(self) -> JsonOutputParser:
"""Create JSON parser for feature-specific reviews extraction"""
return JsonOutputParser()
return JsonOutputParser(pydantic_object=FeatureReviews)
def _create_extraction_prompt(self) -> ChatPromptTemplate:
"""Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis."""
template = """Extract sentences about the given feature from the list of reviews.
Rules:
- Extract only parts discussing the specific feature.
- Remove unrelated parts connected by 'and' or 'but'.
- Keep original wording and capitalization.
- If there is only one review, apply the same rules to extract sentences about the feature.
Reviews: {reviews}
Feature: {feature}
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
{{
"feature_reviews": [
{{
"feature" : {feature}
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}},
prompt = ChatPromptTemplate.from_messages([
("system", """Extract sentences about the given feature from the list of reviews.
Rules:
- Extract only parts discussing the specific feature.
- Remove unrelated parts connected by 'and' or 'but'.
- Keep original wording and capitalization.
- If there are multiple sentences related a particular feature in a review, merge them into one.
- If there is only one review, apply the same rules to extract sentences about the feature.
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
{{
"feature" : {feature}
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}
]
}}
"""
return ChatPromptTemplate.from_template(template)
"feature_reviews": [
{{
"feature" : "feature 1",
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}},
{{
"feature" : "feature 2",
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}
]
}}"""),
("user", "{inputFeatures}"),
("user", "{inputReview}"),
])
return prompt
def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]:
"""
......@@ -72,25 +78,10 @@ class FeatureExtractor:
List[Dict[str, str]]: Feature-specific sentences with sentiment analysis.
"""
try:
extracted_reviews = []
sentences = re.split(r'(?<=[.!?])\s+', review) # Split review into sentences
for feature in features:
feature_sentences = [s for s in sentences if feature.lower() in s.lower()]
for sentence in feature_sentences:
result = self.prompt | self.llm | self.parser
response = result.invoke({
"reviews": sentence,
"feature": feature
})
parsed_data = FeatureReviews(**response) # Validate and parse result
extracted_reviews.extend(parsed_data.feature_reviews)
print(f"Responce from LLM : {extracted_reviews}")
return extracted_reviews
chain = self.prompt | self.llm | self.parser
result = chain.invoke({"inputFeatures": f"features : {features}",
"inputReview": review})
return result['feature_reviews']
except Exception as e:
print(f"Error extracting feature reviews: {e}")
return []
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment