From 9139a7d8e004767b3f1d239833cbb5c0ac76f1f6 Mon Sep 17 00:00:00 2001 From: Marcos Prieto Date: Tue, 5 Nov 2024 15:35:08 +0100 Subject: [PATCH] Base moodle's file_exist method on the file content instead of headers In moodle we can't make a API call to check if a file exists and we have to rely on the URL where file will be downloaded. When a problem occurs we get a 200 response with a JSON document with the reason of the error. We don't want to always download the document as it will actually download the full PDF file for success cases, the most common case. Until now we been relying on the headers of a HEAD HTTP request, interpreting JSON responses as the file being missing. We have found at least one school that doesn't include the content-type header on the response so he are switch the approach to inspect the first bytes of the response and check if we are getting a PDF back from Moodle. --- lms/services/moodle.py | 19 +++++++++++-------- tests/unit/lms/services/moodle_test.py | 23 ++++++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/lms/services/moodle.py b/lms/services/moodle.py index 89cf551eb0..f2c672374e 100644 --- a/lms/services/moodle.py +++ b/lms/services/moodle.py @@ -137,14 +137,17 @@ def list_files(self, course_id: int): def file_exists(self, file_id) -> bool: """Check if the file exists in the course.""" # Moodle file IDs are URLs, but they need the token to be accessible - response = self._http.request("HEAD", f"{file_id}&token={self.token}") - # Moodle API doesn't use status codes, we can't rely on that. - # We don't want to download the full file so we'll do a HEAD request and assume: - # - JSON response, it's an error response - # - Anything else, it's the file we are after - - LOG.info("Headers from Moodle file check %s", response.headers) - return not response.headers["content-type"].startswith("application/json") + response = self._http.request( + "GET", f"{file_id}&token={self.token}", headers={"Range": "bytes=0-1024"} + ) + # API doesn't use status codes, we can't rely on that. + if content_type := response.headers.get("content-type"): + # JSON response, assume it's an error response + return not content_type.startswith("application/json") + + # If the Moodle server doesn't return the content-type header, check the first bytes of the response + # and check if the files is indeed a PDF. + return b"%PDF" in response.content def page(self, course_id, page_id) -> dict | None: url = self._api_url(Function.GET_PAGES) diff --git a/tests/unit/lms/services/moodle_test.py b/tests/unit/lms/services/moodle_test.py index 1de2d51f7b..30d3550079 100644 --- a/tests/unit/lms/services/moodle_test.py +++ b/tests/unit/lms/services/moodle_test.py @@ -149,19 +149,24 @@ def test_list_files(self, svc, http_service, contents): } ] + @pytest.mark.parametrize("content", [b"some other content", b"%PDF-14"]) @pytest.mark.parametrize( - "header,expected", - [ - ("application/json", False), - ("application/pdf", True), - ], + "headers", + [{}, {"content-type": "application/json"}, {"content-type": "application/pdf"}], ) - def test_file_exists(self, svc, http_service, header, expected): - http_service.request.return_value = Mock(headers={"content-type": header}) + def test_file_exists(self, svc, http_service, headers, content): + http_service.request.return_value = Mock(content=content, headers=headers) - assert svc.file_exists("URL") == expected + expected = ( + not headers.get("content-type", "").startswith("application/json") + if headers.get("content-type") + else b"%PDF" in content + ) - http_service.request.assert_called_once_with("HEAD", "URL&token=sentinel.token") + assert svc.file_exists("URL") == expected + http_service.request.assert_called_once_with( + "GET", "URL&token=sentinel.token", headers={"Range": "bytes=0-1024"} + ) def test_list_pages(self, svc, http_service, contents): http_service.post.return_value.json.return_value = contents