Skip to content

Commit 88432c6

Browse files
MetalBlazerroot
andauthored
similar question (#31)
* similar question * requirements added * ordering improved * ajax typing Co-authored-by: root <root@localhost.localdomain>
1 parent 270f54e commit 88432c6

6 files changed

Lines changed: 68 additions & 24 deletions

File tree

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,6 @@ django-compressor==2.2
1212
django-extensions==2.1.3
1313
django-filter==2.0.0
1414
django-debug-toolbar==1.4
15-
python-dotenv==0.10.3
15+
python-dotenv==0.10.3
16+
nltk==3.5
17+
sklearn==0.0

static/website/js/custom.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ $(document).ready(function() {
33
$tutorial = $("#id_tutorial");
44
$minute_range = $("#id_minute_range");
55
$second_range = $("#id_second_range");
6+
$title = $("#id_title");
67
var tutorial = $tutorial.val();
78
var category = $category.val();
89

@@ -98,15 +99,16 @@ $(document).ready(function() {
9899
}
99100
});
100101

101-
$second_range.change(function() {
102+
$title.keyup(function() {
102103
$.ajax({
103104
url: "/ajax-similar-questions/",
104105
type: "POST",
105106
data: {
106107
category: $category.val(),
107108
tutorial: $tutorial.val(),
108109
minute_range: $minute_range.val(),
109-
second_range: $second_range.val()
110+
second_range: $second_range.val(),
111+
title: $title.val()
110112
},
111113
dataType: "html",
112114
success: function(data) {

static/website/templates/ajax-similar-questions.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
<div>
22
<div id="similar-count">
3-
{% if questions.count > 0 %}
4-
{% if questions.count == 1 %}
5-
1 similar question
3+
{% if questions_count > 0 %}
4+
{% if questions_count == 1 %}
5+
1 similar, previously asked question on our Forums
66
{% else %}
7-
{{ questions.count }} similar questions
7+
{{ questions_count }} similar, previously asked questions on our Forums
88
{% endif %}
99
{% endif %}
1010
</div>

static/website/templates/new-question.html

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@ <h4>
3131
<div class="col-lg-2 col-md-2 col-sm-2">
3232
{% render_field form.second_range class+="form-control" disabled="disabled" %}
3333
</div>
34-
<div class="col-lg-2 col-md-2 col-sm-2">
35-
<small><strong>
36-
<a id="similar-link" data-toggle="modal" data-target="#similarModal" href="#">
37-
0 similar questions
38-
</a>
39-
</strong></small>
40-
</div>
4134
</div>
4235
<hr>
4336

@@ -48,6 +41,15 @@ <h4>
4841
<label for="id_title">Title:</label>
4942
{% render_field form.title class+="form-control" %}
5043
</div>
44+
45+
<div class="form=group">
46+
<small><strong>
47+
<a id="similar-link" data-toggle="modal" data-target="#similarModal" href="#">
48+
0 similar, previously asked question on our Forums
49+
</a>
50+
</strong></small>
51+
</div>
52+
5153
<div class="form-group">
5254
<label for="id_body">Question:</label>
5355
{% render_field form.body class+="form-control" %}

website/helpers.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import re
2-
2+
from website.models import Question
3+
from nltk.corpus import stopwords
4+
from nltk.tokenize import word_tokenize
5+
from sklearn.metrics.pairwise import cosine_similarity
6+
sw = stopwords.words('english')
37

48
def get_video_info(path):
59
"""Uses ffmpeg to determine information about a video. This has not been broadly
@@ -37,3 +41,30 @@ def prettify(string):
3741
string = re.sub('[^A-Za-z0-9\-]+', '', string)
3842
string = re.sub('-+', '-', string)
3943
return string
44+
45+
46+
def pre_process(text):
47+
text=text.lower() # lowercase
48+
text=re.sub("<!--?.*?-->","",text) # remove tags
49+
text=re.sub("(\\d|\\W)+"," ",text) # remove special characters and digits
50+
return text
51+
52+
def clean_user_data(text):
53+
words = word_tokenize(pre_process(text.lower()))
54+
clean_list = [w for w in words if not w in sw]
55+
return clean_list
56+
57+
def get_similar_questions(user_ques,question):
58+
total = []
59+
l1 = []
60+
l2 = []
61+
question = word_tokenize(pre_process(question))
62+
question =[w for w in question if not w in sw]
63+
total = user_ques + question
64+
for w in total:
65+
if w in user_ques: l1.append(1) # create a vector
66+
else: l1.append(0)
67+
if w in question: l2.append(1)
68+
else: l2.append(0)
69+
cs = cosine_similarity((l1,l2))
70+
return cs[0][1]

website/views.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from website.models import Question, Answer, Notification, AnswerComment
1313
from spoken_auth.models import TutorialDetails, TutorialResources
1414
from website.forms import NewQuestionForm, AnswerQuesitionForm
15-
from website.helpers import get_video_info, prettify
15+
from website.helpers import get_video_info, prettify, clean_user_data, get_similar_questions
1616
from django.conf import settings
1717
from website.templatetags.permission_tags import can_edit
1818
from spoken_auth.models import FossCategory
@@ -546,15 +546,22 @@ def ajax_answer_comment_update(request):
546546

547547
def ajax_similar_questions(request):
548548
if request.method == 'POST':
549-
category = request.POST['category']
550-
tutorial = request.POST['tutorial']
551-
# minute_range = request.POST['minute_range']
552-
# second_range = request.POST['second_range']
553-
554-
# add more filtering when the forum grows
555-
questions = Question.objects.filter(category=category).filter(tutorial=tutorial)
549+
category = request.POST['category'].replace(' ','-')
550+
tutorial = request.POST['tutorial'].replace(' ','-')
551+
title = request.POST['title']
552+
user_title = clean_user_data(title)
553+
# Increase the threshold as the Forums questions increase
554+
THRESHOLD = 0.3
555+
top_ques = []
556+
questions = Question.objects.filter(category=category,tutorial=tutorial)
557+
for question in questions:
558+
question.similarity= get_similar_questions(user_title,question.title)
559+
if question.similarity >= THRESHOLD:
560+
top_ques.append(question)
561+
top_ques = sorted(top_ques,key=lambda x : x.similarity, reverse=True)
556562
context = {
557-
'questions': questions
563+
'questions': top_ques,
564+
'questions_count':len(top_ques)
558565
}
559566
return render(request, 'website/templates/ajax-similar-questions.html', context)
560567

0 commit comments

Comments
 (0)