Skip to content

Commit

Permalink
Base moodle's file_exist method on the file content instead of headers
Browse files Browse the repository at this point in the history
In moodle we can't make a API call to check if a file exists and we have
to rely on the URL where file will be downloaded.

When a problem occurs we get a 200 response with  a JSON document
with the reason of the error.

We don't want to always download the document as it will actually
download the full PDF file for success cases, the most common case.

Until now we been relying on the headers of a HEAD HTTP request,
interpreting JSON responses as the file being missing.

We have found at least one school that doesn't include the content-type
header on the response so he are switch the approach to inspect the
first bytes of the response and check if we are getting a PDF back from
Moodle.
  • Loading branch information
marcospri committed Nov 6, 2024
1 parent 517c3c6 commit eb21f29
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 17 deletions.
19 changes: 11 additions & 8 deletions lms/services/moodle.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,17 @@ def list_files(self, course_id: int):
def file_exists(self, file_id) -> bool:
"""Check if the file exists in the course."""
# Moodle file IDs are URLs, but they need the token to be accessible
response = self._http.request("HEAD", f"{file_id}&token={self.token}")
# Moodle API doesn't use status codes, we can't rely on that.
# We don't want to download the full file so we'll do a HEAD request and assume:
# - JSON response, it's an error response
# - Anything else, it's the file we are after

LOG.info("Headers from Moodle file check %s", response.headers)
return not response.headers["content-type"].startswith("application/json")
response = self._http.request(
"GET", f"{file_id}&token={self.token}", headers={"Range": "bytes=0-1024"}
)
# API doesn't use status codes, we can't rely on that.
if content_type := response.headers.get("content-type"):
# JSON response, assume it's an error response
return not content_type.startswith("application/json")

# If the Moodle server doesn't return the content-type header, check the first bytes of the response
# and check if the files is indeed a PDF.
return b"%PDF" in response.content

def page(self, course_id, page_id) -> dict | None:
url = self._api_url(Function.GET_PAGES)
Expand Down
23 changes: 14 additions & 9 deletions tests/unit/lms/services/moodle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,19 +149,24 @@ def test_list_files(self, svc, http_service, contents):
}
]

@pytest.mark.parametrize("content", [b"some other content", b"%PDF-14"])
@pytest.mark.parametrize(
"header,expected",
[
("application/json", False),
("application/pdf", True),
],
"headers",
[{}, {"content-type": "application/json"}, {"content-type": "application/pdf"}],
)
def test_file_exists(self, svc, http_service, header, expected):
http_service.request.return_value = Mock(headers={"content-type": header})
def test_file_exists(self, svc, http_service, headers, content):
http_service.request.return_value = Mock(content=content, headers=headers)

assert svc.file_exists("URL") == expected
expected = (
not headers.get("content-type", "").startswith("application/json")
if headers.get("content-type")
else b"%PDF" in content
)

http_service.request.assert_called_once_with("HEAD", "URL&token=sentinel.token")
assert svc.file_exists("URL") == expected
http_service.request.assert_called_once_with(
"GET", "URL&token=sentinel.token", headers={"Range": "bytes=0-1024"}
)

def test_list_pages(self, svc, http_service, contents):
http_service.post.return_value.json.return_value = contents
Expand Down

0 comments on commit eb21f29

Please sign in to comment.