diff --git a/itemloaders/__init__.py b/itemloaders/__init__.py index 70a66de..af8cf3e 100644 --- a/itemloaders/__init__.py +++ b/itemloaders/__init__.py @@ -266,7 +266,7 @@ def load_item(self): return adapter.item - def get_output_value(self, field_name): + def get_output_value(self, field_name, default=None): """ Return the collected values parsed using the output processor, for the given field. This method doesn't populate or modify the item at all. @@ -275,7 +275,10 @@ def get_output_value(self, field_name): proc = wrap_loader_context(proc, self.context) value = self._values.get(field_name, []) try: - return proc(value) + result = proc(value) + if not result and default is not None: + return default + return result except Exception as e: raise ValueError("Error with output processor: field=%r value=%r error='%s: %s'" % (field_name, value, type(e).__name__, str(e))) diff --git a/main.py b/main.py new file mode 100644 index 0000000..db5d205 --- /dev/null +++ b/main.py @@ -0,0 +1,6 @@ +from itemloaders import ItemLoader +from itemloaders.processors import MapCompose, TakeFirst + + +class CustomItemLoader(ItemLoader): + name_out = TakeFirst() diff --git a/tests/test_base_loader.py b/tests/test_base_loader.py index 876b76e..4a4b21b 100644 --- a/tests/test_base_loader.py +++ b/tests/test_base_loader.py @@ -68,7 +68,7 @@ class MyLoader(ItemLoader): url_out = TakeFirst() def img_url_out(self, values): - return (self.get_output_value('url') or '') + values[0] + return self.get_output_value('url', '') + values[0] il = MyLoader(item={}) il.add_value('url', 'http://example.com/') diff --git a/tests/test_loader_initialization.py b/tests/test_loader_initialization.py index 7e8d51a..437e291 100644 --- a/tests/test_loader_initialization.py +++ b/tests/test_loader_initialization.py @@ -1,6 +1,7 @@ import unittest from itemloaders import ItemLoader +from itemloaders.processors import TakeFirst class InitializationTestMixin: @@ -77,6 +78,53 @@ def test_get_output_value_list(self): self.assertIsInstance(loaded_item, self.item_class) self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']})) + def test_get_output_value_default_singlevalue(self): + """ + The default value should be used only when the returned value is + empty (None, '', etc.) and there is a default value defined + """ + input_item = self.item_class() + il = ItemLoader(item=input_item) + il.default_output_processor = TakeFirst() # make "name" field single + + self.assertEqual(il.get_output_value('name'), None) + self.assertEqual(il.get_output_value('name', ''), '') + self.assertEqual(il.get_output_value('name', []), []) + self.assertEqual(il.get_output_value('name', 'foo'), 'foo') + + il.add_value('name', '') + self.assertEqual(il.get_output_value('name'), None) + self.assertEqual(il.get_output_value('name', ''), '') + self.assertEqual(il.get_output_value('name', 'foo'), 'foo') + self.assertEqual(il.load_item(), {}) + + input_item2 = self.item_class() + il2 = ItemLoader(item=input_item2) + il2.default_output_processor = TakeFirst() + il2.add_value('name', 'foo') + self.assertEqual(il2.get_output_value('name'), 'foo') + self.assertEqual(il2.get_output_value('name', 'bar'), 'foo') + self.assertEqual(il2.load_item(), dict({'name': 'foo'})) + + def test_get_output_value_default_list(self): + """ + The default value should be used only when the returned value is + empty ([], etc.) and there is a default value defined + """ + input_item = self.item_class() + il = ItemLoader(item=input_item) + il.add_value('name', []) + self.assertEqual(il.get_output_value('name'), []) + self.assertEqual(il.get_output_value('name', 'foo'), 'foo') + self.assertEqual(il.load_item(), {}) + + input_item2 = self.item_class() + il2 = ItemLoader(item=input_item2) + il2.add_value('name', ['foo', 'bar']) + self.assertEqual(il2.get_output_value('name'), ['foo', 'bar']) + self.assertEqual(il2.get_output_value('name', ['spam']), ['foo', 'bar']) + self.assertEqual(il2.load_item(), dict({'name': ['foo', 'bar']})) + def test_values_single(self): """Values from initial item must be added to loader._values""" input_item = self.item_class(name='foo')