Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
cv_filter_tool
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Lokesh Singh
cv_filter_tool
Commits
7d91b281
Commit
7d91b281
authored
Jan 18, 2025
by
LSING46
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added python code and requirements
parent
1c607c4f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
96 additions
and
0 deletions
+96
-0
cv_filter_tool.py
src/cv_filter_tool.py
+91
-0
requirements.text
src/requirements.text
+5
-0
No files found.
src/cv_filter_tool.py
0 → 100644
View file @
7d91b281
import
nltk
import
PyPDF2
from
sklearn.metrics.pairwise
import
cosine_similarity
from
transformers
import
BertTokenizer
,
BertModel
import
torch
import
string
# Download NLTK resources (you can comment these out if already done)
nltk
.
download
(
'stopwords'
)
nltk
.
download
(
'punkt'
)
# Initialize the BERT tokenizer and model
tokenizer
=
BertTokenizer
.
from_pretrained
(
'bert-base-uncased'
)
model
=
BertModel
.
from_pretrained
(
'bert-base-uncased'
)
# Function to preprocess text (remove stopwords, punctuation, etc.)
def
preprocess_text
(
text
):
# Convert text to lowercase
text
=
text
.
lower
()
# Remove punctuation
text
=
text
.
translate
(
str
.
maketrans
(
''
,
''
,
string
.
punctuation
))
# Tokenize text
words
=
nltk
.
word_tokenize
(
text
)
# Remove stopwords
stopwords
=
set
(
nltk
.
corpus
.
stopwords
.
words
(
'english'
))
words
=
[
word
for
word
in
words
if
word
not
in
stopwords
]
return
' '
.
join
(
words
)
# Function to extract text from PDF
def
extract_pdf_text
(
file_path
):
with
open
(
file_path
,
'rb'
)
as
file
:
reader
=
PyPDF2
.
PdfReader
(
file
)
text
=
''
for
page
in
reader
.
pages
:
text
+=
page
.
extract_text
()
return
text
# Function to get BERT embeddings for text
def
get_bert_embeddings
(
text
):
# Preprocess the text
text
=
preprocess_text
(
text
)
# Tokenize and get BERT inputs
inputs
=
tokenizer
(
text
,
return_tensors
=
'pt'
,
truncation
=
True
,
padding
=
True
,
max_length
=
512
)
# Get embeddings from BERT
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs
)
embeddings
=
outputs
.
last_hidden_state
.
mean
(
dim
=
1
)
.
squeeze
()
return
embeddings
# Function to calculate similarity between CV and JD
def
match_cv_and_jd
(
cv_text
,
jd_text
):
# Get BERT embeddings for both CV and JD
cv_embeddings
=
get_bert_embeddings
(
cv_text
)
jd_embeddings
=
get_bert_embeddings
(
jd_text
)
# Calculate cosine similarity
similarity
=
cosine_similarity
(
cv_embeddings
.
unsqueeze
(
0
),
jd_embeddings
.
unsqueeze
(
0
))
return
similarity
[
0
][
0
]
# Main function to test the matcher
if
__name__
==
"__main__"
:
# Example: Load CV and JD from PDF or string input
cv_text
=
extract_pdf_text
(
'DevendraChaturvediAI_ML.pdf'
)
# Replace with actual CV PDF file
jd_text
=
extract_pdf_text
(
'jd.pdf'
)
# Replace with actual JD PDF file
# Match CV and JD
similarity_score
=
match_cv_and_jd
(
cv_text
,
jd_text
)
# Print the similarity score
print
(
f
"Similarity Score: {similarity_score:.4f}"
)
# Define a threshold to determine if the CV matches the JD (e.g., 0.7 means 70% similarity)
threshold
=
0.7
if
similarity_score
>
threshold
:
print
(
"The CV is a good match for the job description."
)
else
:
print
(
"The CV is not a good match for the job description."
)
src/requirements.text
0 → 100644
View file @
7d91b281
nltk
PyPDF2
scikit-learn
transformers
torch
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment