Skip to content

Commit

Permalink
Natural Language Processing Week3
Browse files Browse the repository at this point in the history
  • Loading branch information
jiadaizhao committed Feb 5, 2019
1 parent 78d8379 commit 2befef1
Show file tree
Hide file tree
Showing 15 changed files with 1,504 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ Deep Learning in Computer Vision/Week5/gan-task/new_samples
Deep Learning in Computer Vision/Week5/gan-task/face_interpolation
*.tsv
Natural Language Processing/Week2/data
Natural Language Processing/Week3/starspace
Natural Language Processing/Week3/StarSpace_embeddings
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
67 changes: 67 additions & 0 deletions Natural Language Processing/Week3/grader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import requests
import json
import numpy as np
from collections import OrderedDict

class Grader(object):
def __init__(self):
self.submission_page = 'https://www.coursera.org/api/onDemandProgrammingScriptSubmissions.v1'
self.assignment_key = '7DdYfMQFEeevjw7-W7Fr0A'
self.parts = OrderedDict([('98mDT', 'Question2Vec'),
('nc7RP', 'HitsCount'),
('bNp90', 'DCGScore'),
('3gRlQ', 'W2VTokenizedRanks'),
('mX6wS', 'StarSpaceRanks')])
self.answers = {key: None for key in self.parts}

@staticmethod
def ravel_output(output):
'''
If student accidentally submitted np.array with one
element instead of number, this function will submit
this number instead
'''
if isinstance(output, np.ndarray) and output.size == 1:
output = output.item(0)
return output

def submit(self, email, token):
submission = {
"assignmentKey": self.assignment_key,
"submitterEmail": email,
"secret": token,
"parts": {}
}
for part, output in self.answers.items():
if output is not None:
submission["parts"][part] = {"output": output}
else:
submission["parts"][part] = dict()
request = requests.post(self.submission_page, data=json.dumps(submission))
response = request.json()
if request.status_code == 201:
print('Submitted to Coursera platform. See results on assignment page!')
elif u'details' in response and u'learnerMessage' in response[u'details']:
print(response[u'details'][u'learnerMessage'])
else:
print("Unknown response from Coursera: {}".format(request.status_code))
print(response)

def status(self):
print("You want to submit these parts:")
for part_id, part_name in self.parts.items():
answer = self.answers[part_id]
if answer is None:
answer = '-'*10
print("Task {}: {}".format(part_name, answer[:100] + '...'))

def submit_part(self, part, output):
self.answers[part] = output
print("Current answer for task {} is: {}".format(self.parts[part], output[:100] + '...'))

def submit_tag(self, tag, output):
part_id = [k for k, v in self.parts.items() if v == tag]
if len(part_id) != 1:
raise RuntimeError('cannot match tag with part_id: found {} matches'.format(len(part_id)))
part_id = part_id[0]
self.submit_part(part_id, str(self.ravel_output(output)))
18 changes: 18 additions & 0 deletions Natural Language Processing/Week3/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import re
from nltk.corpus import stopwords

REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
GOOD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
STOPWORDS = set(stopwords.words('english'))
def text_prepare(text):
text = text.lower()
text = REPLACE_BY_SPACE_RE.sub(' ', text)
text = GOOD_SYMBOLS_RE.sub('', text)
text = ' '.join([x for x in text.split() if x and x not in STOPWORDS])
return text.strip()

def array_to_string(arr):
return '\n'.join(str(num) for num in arr)

def matrix_to_string(matrix):
return '\n'.join('\t'.join(str(num) for num in line) for line in matrix)
Loading

0 comments on commit 2befef1

Please sign in to comment.