Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arc: Reading file metadata (file header appendix) #12

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion warc/arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def __init__(self, filename=None, mode=None, fileobj=None, version = None, file_
self.file_headers = file_headers
self.header_written = False
self.header_read = False
self.file_meta = ''


def _write_header(self):
Expand Down Expand Up @@ -311,7 +312,6 @@ def _read_file_header(self):
payload1 = self.fileobj.readline()
payload2 = self.fileobj.readline()
version, reserved, organisation = payload1.split(None, 2)
self.fileobj.readline() # Lose the separator newline
self.header_read = True
# print "--------------------------------------------------"
# print header,"\n", payload1, "\n", payload2,"\n"
Expand All @@ -334,6 +334,14 @@ def _read_file_header(self):
else:
raise IOError("Unknown ARC version '%s'"%version)

current = len(payload1) + len(payload2)
self.file_meta = ''
while current < int(length):
line = self.fileobj.readline()
current = current + len(line)
self.file_meta = self.file_meta + line
self.fileobj.readline() # Lose the separator newline

def _read_arc_record(self):
"Reads out an arc record, formats it and returns it"
#XXX:Noufal Stream payload here rather than just read it
Expand Down