Skip to content

Commit

Permalink
refactor: add jplag plagiarism link support for group tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
JackSCarroll committed Oct 11, 2024
1 parent 2a23759 commit f9b0ae0
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 208 deletions.
21 changes: 3 additions & 18 deletions app/api/similarity/entities/task_similarity_entity.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
module Similarity
module Entities
class TaskSimilarityEntity < Grape::Entity
def staff?(my_role)
Role.teaching_staff_ids.include?(my_role.id) unless my_role.nil?
end

expose :id
expose :type
Expand All @@ -13,32 +10,20 @@ def staff?(my_role)
similarity.ready_for_viewer?
end

expose :parts do |similarity, options|
expose :parts do |similarity|
path = similarity.file_path
has_resource = path.present? && File.exist?(path)

result = [
{
idx: 0,
format: if has_resource
similarity.type == 'MossTaskSimilarity' ? 'html' : 'pdf'
similarity.type == 'JplagTaskSimilarity' ? 'html' : 'pdf'
end,
description: "#{similarity.student.name} (#{similarity.student.username}) - #{similarity.pct}%"
description: "#{similarity.other_student.name} (#{similarity.other_student.username}) - #{similarity.pct}% similarity"
}
]

# For moss similarity, show staff other student details
if similarity.type == 'MossTaskSimilarity' && staff?(options[:my_role])
other_path = similarity.other_similarity&.file_path
has_other_resource = other_path.present? && File.exist?(other_path)

result << {
idx: 1,
format: has_other_resource ? 'html' : nil,
description: "Match: #{similarity.other_student&.name} (#{similarity.other_student&.username}) - #{similarity.other_similarity&.pct}"
}
end

result
end
end
Expand Down
249 changes: 60 additions & 189 deletions app/models/similarity/unit_similarity_module.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ def check_similarity(force: false)
tasks = tasks_for_definition(td)
tasks_with_files = tasks.select(&:has_pdf)

# JPLAG
run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code)
report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.id}-result.zip"
warn_pct = td.plagiarism_warn_pct || 50
puts "Warn PCT: #{warn_pct}"
create_jplag_plagiarism_link(report_path, warn_pct)
process_jplag_plagiarism_report(report_path, warn_pct, td.group_set)

# Skip if not due yet
next if td.due_date > Time.zone.now
# TODO: Re-enable this after testing
# next if td.due_date > Time.zone.now

# Skip if no files changed
next unless tasks_with_files.count > 1 &&
Expand All @@ -59,48 +59,7 @@ def check_similarity(force: false)
force
)

# There are new tasks, check these

logger.debug 'Contacting MOSS for new checks'

# Create the MossRuby object
# moss_key = Doubtfire::Application.secrets.secret_key_moss
# raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil?
#
# moss = MossRuby.new(moss_key)
#
# # Set options -- the options will already have these default values
# moss.options[:max_matches] = 7
# moss.options[:directory_submission] = true
# moss.options[:show_num_matches] = 500
# moss.options[:experimental_server] = false
# moss.options[:comment] = ''
# moss.options[:language] = type_data[1]
#
# tmp_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}")
#
# begin
# # Create a file hash, with the files to be processed
# to_check = MossRuby.empty_file_hash
# add_done_files_for_plagiarism_check_of(td, tmp_path, to_check, tasks_with_files)
#
# FileUtils.chdir(tmp_path)
#
# # Get server to process files
# logger.debug 'Sending to MOSS...'
# url = moss.check(to_check, ->(_) { print '.' })
#
# logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}"
#
# td.plagiarism_report_url = url
# td.plagiarism_updated = true
# td.save
# rescue StandardError => e
# logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}"
# ensure
# FileUtils.chdir(pwd)
# FileUtils.rm_rf tmp_path
# end
# There are new tasks, check these with JPLAG
end
self.last_plagarism_scan = Time.zone.now
save!
Expand All @@ -111,128 +70,26 @@ def check_similarity(force: false)
self
end

def update_plagiarism_stats
moss_key = Doubtfire::Application.secrets.secret_key_moss
raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil?

moss = MossRuby.new(moss_key)

task_definitions.where(plagiarism_updated: true).find_each do |td|
td.plagiarism_updated = false
td.save

# Get results
url = td.plagiarism_report_url
logger.debug "Processing MOSS results #{url}"

warn_pct = td.plagiarism_warn_pct || 50

results = moss.extract_results(url, warn_pct, ->(line) { puts line })

# Use results
results.each do |match|
task_id1 = %r{.*/(\d+)/$}.match(match[0][:filename])[1]
task_id2 = %r{.*/(\d+)/$}.match(match[1][:filename])[1]

t1 = Task.find(task_id1)
t2 = Task.find(task_id2)

if t1.nil? || t2.nil?
logger.error "Could not find tasks #{task_id1} or #{task_id2} for plagiarism stats check!"
next
end

if td.group_set # its a group task
g1_tasks = t1.group_submission.tasks
g2_tasks = t2.group_submission.tasks

g1_tasks.each do |gt1|
g2_tasks.each do |gt2|
create_plagiarism_link(gt1, gt2, match, warn_pct)
end
end

else # just link the individuals...
create_plagiarism_link(t1, t2, match, warn_pct)
end
end
end

self.last_plagarism_scan = Time.zone.now
save!

self
end

private

# def create_plagiarism_link(task1, task2, match, warn_pct)
# plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first
# plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first
#
# if plk1.nil? || plk2.nil?
# # Delete old links between tasks
# plk1&.destroy ## will delete its pair
# plk2&.destroy
#
# plk1 = MossTaskSimilarity.create do |plm|
# plm.task = task1
# plm.other_task = task2
# plm.pct = match[0][:pct]
# plm.flagged = plm.pct >= warn_pct
# end
#
# plk2 = MossTaskSimilarity.create do |plm|
# plm.task = task2
# plm.other_task = task1
# plm.pct = match[1][:pct]
# plm.flagged = plm.pct >= warn_pct
# Extract all done files related to a task definition matching a pattern into a given directory.
# Returns an array of files
# def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, tasks_with_files)
# # get each code file for each task
# task_definition.upload_requirements.each_with_index do |upreq, idx|
# # only check code files marked for similarity checks
# next unless upreq['type'] == 'code' && upreq['tii_check']
#
# pattern = task_definition.glob_for_upload_requirement(idx)
#
# tasks_with_files.each do |t|
# t.extract_file_from_done(tmp_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) })
# end
# else
# # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}"
#
# # Flag is larger than warn pct and larger than previous pct
# plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct
# plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct
#
# plk1.pct = match[0][:pct]
# plk2.pct = match[1][:pct]
# end
#
# plk1.plagiarism_report_url = match[0][:url]
# plk2.plagiarism_report_url = match[1][:url]
#
# plk1.save!
# plk2.save!
#
# FileHelper.save_plagiarism_html(plk1, match[0][:html])
# FileHelper.save_plagiarism_html(plk2, match[1][:html])
#
# self
# end

#
# Extract all done files related to a task definition matching a pattern into a given directory.
# Returns an array of files
#
def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, tasks_with_files)
# get each code file for each task
task_definition.upload_requirements.each_with_index do |upreq, idx|
# only check code files marked for similarity checks
next unless upreq['type'] == 'code' && upreq['tii_check']

pattern = task_definition.glob_for_upload_requirement(idx)

tasks_with_files.each do |t|
t.extract_file_from_done(tmp_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) })
end

# extract files matching each pattern
# -- each pattern
MossRuby.add_file(to_check, "**/#{pattern}")
end

self
end

# JPLAG Function - extracts "done" files for each task and packages them into a directory for JPLAG to run on
def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_code)
similarity_pct = task_definition.plagiarism_warn_pct
Expand Down Expand Up @@ -274,7 +131,7 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c
self
end

def create_jplag_plagiarism_link(path, warn_pct)
def process_jplag_plagiarism_report(path, warn_pct, is_group)
# Extract overview json from report zip
Zip::File.open(path) do |zip_file|
overview_entry = zip_file.find_entry('overview.json')
Expand Down Expand Up @@ -314,35 +171,18 @@ def create_jplag_plagiarism_link(path, warn_pct)
next
end

# Create a new plagiarism link between the two tasks
plk1 = JplagTaskSimilarity.where(task_id: task1_id, other_task_id: task2_id).first
plk2 = JplagTaskSimilarity.where(task_id: task2_id, other_task_id: task1_id).first
if plk1.nil? || plk2.nil?
# Delete old links between tasks
plk1&.destroy ## will delete its pair
plk2&.destroy
plk1 = JplagTaskSimilarity.create do |plm|
plm.task = first_submission
plm.other_task = second_submission
plm.pct = comparison[:max_similarity]
plm.flagged = plm.pct >= warn_pct
if is_group # its a group task
g1_tasks = first_submission.group_submission.tasks
g2_tasks = second_submission.group_submission.tasks
g1_tasks.each do |gt1|
g2_tasks.each do |gt2|
next if gt1.student == gt2.student
create_plagiarism_link(gt1, gt2, warn_pct, comparison[:max_similarity])
end
end
plk2 = JplagTaskSimilarity.create do |plm|
plm.task = second_submission
plm.other_task = first_submission
plm.pct = comparison[:max_similarity]
plm.flagged = plm.pct >= warn_pct
end
else
# puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}"
# Flag is larger than warn pct and larger than previous pct
plk1.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk1.pct
plk2.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk2.pct
plk1.pct = comparison[:max_similarity]
plk2.pct = comparison[:max_similarity]
else # just link the individuals...
create_plagiarism_link(first_submission, second_submission, warn_pct, comparison[:max_similarity])
end
plk1.save!
plk2.save!
end
else
puts 'overview.json not found in the zip file'
Expand All @@ -351,4 +191,35 @@ def create_jplag_plagiarism_link(path, warn_pct)
self
end
end

def create_plagiarism_link(task1, task2, warn_pct, max_similarity)
# Create a new plagiarism link between the two tasks
plk1 = JplagTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first
plk2 = JplagTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first
if plk1.nil? || plk2.nil?
# Delete old links between tasks
plk1&.destroy ## will delete its pair
plk2&.destroy
plk1 = JplagTaskSimilarity.create do |plm|
plm.task = task1
plm.other_task = task2
plm.pct = max_similarity
plm.flagged = plm.pct >= warn_pct
end
plk2 = JplagTaskSimilarity.create do |plm|
plm.task = task2
plm.other_task = task1
plm.pct = max_similarity
plm.flagged = plm.pct >= warn_pct
end
else
# Flag is larger than warn pct and larger than previous pct
plk1.flagged = max_similarity >= warn_pct && max_similarity >= plk1.pct
plk2.flagged = max_similarity >= warn_pct && max_similarity >= plk2.pct
plk1.pct = max_similarity
plk2.pct = max_similarity
end
plk1.save!
plk2.save!
end
end
2 changes: 1 addition & 1 deletion app/models/task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def specific_permission_hash(role, perm_hash, _other)

has_many :comments, class_name: 'TaskComment', dependent: :destroy, inverse_of: :task
has_many :task_similarities, class_name: 'TaskSimilarity', dependent: :destroy, inverse_of: :task
has_many :reverse_task_similarities, class_name: 'MossTaskSimilarity', dependent: :destroy, inverse_of: :other_task, foreign_key: 'other_task_id'
has_many :reverse_task_similarities, class_name: 'JplagTaskSimilarity', dependent: :destroy, inverse_of: :other_task, foreign_key: 'other_task_id'
has_many :learning_outcome_task_links, dependent: :destroy # links to learning outcomes
has_many :learning_outcomes, through: :learning_outcome_task_links
has_many :task_engagements, dependent: :destroy
Expand Down

0 comments on commit f9b0ae0

Please sign in to comment.