Commit 4a85dbf2 authored by BRellu's avatar BRellu

optimize prompt

parent 886c124b
import os
import re
from typing import List, Dict from typing import List, Dict
from dotenv import load_dotenv from dotenv import load_dotenv
...@@ -13,8 +11,15 @@ from src.reviewsense_ecom.llm.llm import get_llm ...@@ -13,8 +11,15 @@ from src.reviewsense_ecom.llm.llm import get_llm
load_dotenv() load_dotenv()
class FeatureReview(BaseModel):
feature: str
sentence: str
sentiment: str
confidence: str
class FeatureReviews(BaseModel): class FeatureReviews(BaseModel):
feature_reviews: List[dict] # Each review will include a sentence and sentiment feature_reviews: List[FeatureReview] # Each review will include a sentence and sentiment
class FeatureExtractor: class FeatureExtractor:
...@@ -25,40 +30,41 @@ class FeatureExtractor: ...@@ -25,40 +30,41 @@ class FeatureExtractor:
def _create_reviews_parser(self) -> JsonOutputParser: def _create_reviews_parser(self) -> JsonOutputParser:
"""Create JSON parser for feature-specific reviews extraction""" """Create JSON parser for feature-specific reviews extraction"""
return JsonOutputParser() return JsonOutputParser(pydantic_object=FeatureReviews)
def _create_extraction_prompt(self) -> ChatPromptTemplate: def _create_extraction_prompt(self) -> ChatPromptTemplate:
"""Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis.""" """Create prompt for extracting feature-specific reviews with enhanced rules and sentiment analysis."""
template = """Extract sentences about the given feature from the list of reviews. prompt = ChatPromptTemplate.from_messages([
("system", """Extract sentences about the given feature from the list of reviews.
Rules:
- Extract only parts discussing the specific feature. Rules:
- Remove unrelated parts connected by 'and' or 'but'. - Extract only parts discussing the specific feature.
- Keep original wording and capitalization. - Remove unrelated parts connected by 'and' or 'but'.
- If there is only one review, apply the same rules to extract sentences about the feature. - Keep original wording and capitalization.
- If there are multiple sentences related a particular feature in a review, merge them into one.
Reviews: {reviews} - If there is only one review, apply the same rules to extract sentences about the feature.
Feature: {feature}
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
Return only the parts discussing the specific feature and perform sentiment analysis for each extracted sentence in this JSON format:
{{
"feature_reviews": [
{{
"feature" : {feature}
"sentence": "relevant sentence 1",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}},
{{ {{
"feature" : {feature} "feature_reviews": [
"sentence": "relevant sentence 2", {{
"sentiment": "positive/negative/neutral", "feature" : "feature 1",
"confidence": "confidence score between 0 and 1" "sentence": "relevant sentence 1",
}} "sentiment": "positive/negative/neutral",
] "confidence": "confidence score between 0 and 1"
}} }},
""" {{
return ChatPromptTemplate.from_template(template) "feature" : "feature 2",
"sentence": "relevant sentence 2",
"sentiment": "positive/negative/neutral",
"confidence": "confidence score between 0 and 1"
}}
]
}}"""),
("user", "{inputFeatures}"),
("user", "{inputReview}"),
])
return prompt
def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]: def extract_feature_reviews(self, review: str, features: List[str]) -> List[Dict[str, str]]:
""" """
...@@ -72,25 +78,10 @@ class FeatureExtractor: ...@@ -72,25 +78,10 @@ class FeatureExtractor:
List[Dict[str, str]]: Feature-specific sentences with sentiment analysis. List[Dict[str, str]]: Feature-specific sentences with sentiment analysis.
""" """
try: try:
extracted_reviews = [] chain = self.prompt | self.llm | self.parser
sentences = re.split(r'(?<=[.!?])\s+', review) # Split review into sentences result = chain.invoke({"inputFeatures": f"features : {features}",
"inputReview": review})
for feature in features: return result['feature_reviews']
feature_sentences = [s for s in sentences if feature.lower() in s.lower()]
for sentence in feature_sentences:
result = self.prompt | self.llm | self.parser
response = result.invoke({
"reviews": sentence,
"feature": feature
})
parsed_data = FeatureReviews(**response) # Validate and parse result
extracted_reviews.extend(parsed_data.feature_reviews)
print(f"Responce from LLM : {extracted_reviews}")
return extracted_reviews
except Exception as e: except Exception as e:
print(f"Error extracting feature reviews: {e}") print(f"Error extracting feature reviews: {e}")
return [] return []
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment