From 020da791c2050d6207cb7d0afc90f6c85ee0de42 Mon Sep 17 00:00:00 2001 From: Dmitry Reshetnik Date: Tue, 26 Nov 2024 18:26:39 -0500 Subject: [PATCH] add parameters --- src/rev_ai/apiclient.py | 67 +++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/src/rev_ai/apiclient.py b/src/rev_ai/apiclient.py index 830d6e4..ede8c6f 100644 --- a/src/rev_ai/apiclient.py +++ b/src/rev_ai/apiclient.py @@ -337,28 +337,36 @@ def get_list_of_jobs(self, limit=None, starting_after=None): return [Job.from_json(job) for job in response.json()] - def get_transcript_text(self, id_): + def get_transcript_text(self, id_, group_channels_by=None, group_channels_threshold_ms=None): """Get the transcript of a specific job as plain text. :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds :returns: transcript data as text :raises: HTTPError """ if not id_: raise ValueError('id_ must be provided') + + url = self._build_transcript_url(self, id_, + group_channels_by=group_channels_by, + group_channels_threshold_ms=group_channels_threshold_ms) response = self._make_http_request( "GET", - urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)), + url, headers={'Accept': 'text/plain'} ) return response.text - def get_transcript_text_as_stream(self, id_): + def get_transcript_text_as_stream(self, id_, group_channels_by=None, group_channels_threshold_ms=None): """Get the transcript of a specific job as a plain text stream. :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds :returns: requests.models.Response HTTP response which can be used to stream the payload of the response :raises: HTTPError @@ -366,37 +374,49 @@ def get_transcript_text_as_stream(self, id_): if not id_: raise ValueError('id_ must be provided') + url = self._build_transcript_url(self, id_, + group_channels_by=group_channels_by, + group_channels_threshold_ms=group_channels_threshold_ms) + response = self._make_http_request( "GET", - urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)), + url, headers={'Accept': 'text/plain'}, stream=True ) return response - def get_transcript_json(self, id_): + def get_transcript_json(self, id_, group_channels_by=None, group_channels_threshold_ms=None): """Get the transcript of a specific job as json. :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds :returns: transcript data as json :raises: HTTPError """ if not id_: raise ValueError('id_ must be provided') + url = self._build_transcript_url(self, id_, + group_channels_by=group_channels_by, + group_channels_threshold_ms=group_channels_threshold_ms) + response = self._make_http_request( "GET", - urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)), + url, headers={'Accept': self.rev_json_content_type} ) return response.json() - def get_transcript_json_as_stream(self, id_): + def get_transcript_json_as_stream(self, id_, group_channels_by=None, group_channels_threshold_ms=None): """Get the transcript of a specific job as streamed json. :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds :returns: requests.models.Response HTTP response which can be used to stream the payload of the response :raises: HTTPError @@ -404,28 +424,38 @@ def get_transcript_json_as_stream(self, id_): if not id_: raise ValueError('id_ must be provided') + url = self._build_transcript_url(self, id_, + group_channels_by=group_channels_by, + group_channels_threshold_ms=group_channels_threshold_ms) + response = self._make_http_request( "GET", - urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)), + url, headers={'Accept': self.rev_json_content_type}, stream=True ) return response - def get_transcript_object(self, id_): + def get_transcript_object(self, id_, group_channels_by=None, group_channels_threshold_ms=None): """Get the transcript of a specific job as a python object`. :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds :returns: transcript data as a python object :raises: HTTPError """ if not id_: raise ValueError('id_ must be provided') + url = self._build_transcript_url(self, id_, + group_channels_by=group_channels_by, + group_channels_threshold_ms=group_channels_threshold_ms) + response = self._make_http_request( "GET", - urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)), + url, headers={'Accept': self.rev_json_content_type} ) @@ -814,3 +844,20 @@ def _create_job_options_payload( def _create_captions_query(self, speaker_channel): return '' if speaker_channel is None else '?speaker_channel={}'.format(speaker_channel) + + def _build_transcript_url(self, id_, group_channels_by=None, group_channels_threshold_ms=None): + """Build the get transcript url. + + :param id_: id of job to be requested + :param group_channels_by: optional, group channels by speaker or time + :param group_channels_threshold_ms: optional, group channels by time threshold in milliseconds + :returns: url for getting the transcript + """ + params = [] + if group_channels_by is not None: + params.append('group_channels_by={}'.format(group_channels_by)) + if group_channels_threshold_ms is not None: + params.append('group_channels_threshold_ms={}'.format(group_channels_threshold_ms)) + + query = '?{}'.format('&'.join(params)) + return urljoin(self.base_url, 'jobs/{}/transcript{}'.format(id_, query))