Skip to content

Commit

Permalink
REVAI-4324: Multichannel transcript grouping
Browse files Browse the repository at this point in the history
  • Loading branch information
dmtrrk authored Nov 27, 2024
1 parent 172ffac commit e36130e
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 31 deletions.
10 changes: 5 additions & 5 deletions src/rev_ai/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""Top-level package for rev_ai"""

__version__ = '2.20.0'
__version__ = '2.21.0'

from .models import Job, JobStatus, Account, Transcript, Monologue, Element, MediaConfig, \
CaptionType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence, \
SentimentAnalysisResult, SentimentValue, SentimentMessage, SentimentAnalysisJob, \
CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment
CaptionType, GroupChannelsType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, \
Topic, Informant, SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, \
LanguageConfidence, SentimentAnalysisResult, SentimentValue, SentimentMessage, \
SentimentAnalysisJob, CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment
98 changes: 88 additions & 10 deletions src/rev_ai/apiclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,95 +337,154 @@ def get_list_of_jobs(self, limit=None, starting_after=None):

return [Job.from_json(job) for job in response.json()]

def get_transcript_text(self, id_):
def get_transcript_text(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
"""Get the transcript of a specific job as plain text.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: transcript data as text
:raises: HTTPError
"""
if not id_:
raise ValueError('id_ must be provided')

url = self._build_transcript_url(
id_,
group_channels_by=group_channels_by,
group_channels_threshold_ms=group_channels_threshold_ms
)

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
url,
headers={'Accept': 'text/plain'}
)

return response.text

def get_transcript_text_as_stream(self, id_):
def get_transcript_text_as_stream(self,
id_,
group_channels_by=None,
group_channels_threshold_ms=None):
"""Get the transcript of a specific job as a plain text stream.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: requests.models.Response HTTP response which can be used to stream
the payload of the response
:raises: HTTPError
"""
if not id_:
raise ValueError('id_ must be provided')

url = self._build_transcript_url(
id_,
group_channels_by=group_channels_by,
group_channels_threshold_ms=group_channels_threshold_ms
)

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
url,
headers={'Accept': 'text/plain'},
stream=True
)

return response

def get_transcript_json(self, id_):
def get_transcript_json(self,
id_,
group_channels_by=None,
group_channels_threshold_ms=None):
"""Get the transcript of a specific job as json.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: transcript data as json
:raises: HTTPError
"""
if not id_:
raise ValueError('id_ must be provided')

url = self._build_transcript_url(
id_,
group_channels_by=group_channels_by,
group_channels_threshold_ms=group_channels_threshold_ms
)

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
url,
headers={'Accept': self.rev_json_content_type}
)

return response.json()

def get_transcript_json_as_stream(self, id_):
def get_transcript_json_as_stream(self,
id_,
group_channels_by=None,
group_channels_threshold_ms=None):
"""Get the transcript of a specific job as streamed json.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: requests.models.Response HTTP response which can be used to stream
the payload of the response
:raises: HTTPError
"""
if not id_:
raise ValueError('id_ must be provided')

url = self._build_transcript_url(
id_,
group_channels_by=group_channels_by,
group_channels_threshold_ms=group_channels_threshold_ms
)

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
url,
headers={'Accept': self.rev_json_content_type},
stream=True
)

return response

def get_transcript_object(self, id_):
def get_transcript_object(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
"""Get the transcript of a specific job as a python object`.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: transcript data as a python object
:raises: HTTPError
"""
if not id_:
raise ValueError('id_ must be provided')

url = self._build_transcript_url(
id_,
group_channels_by=group_channels_by,
group_channels_threshold_ms=group_channels_threshold_ms
)

response = self._make_http_request(
"GET",
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
url,
headers={'Accept': self.rev_json_content_type}
)

Expand Down Expand Up @@ -814,3 +873,22 @@ def _create_job_options_payload(

def _create_captions_query(self, speaker_channel):
return '' if speaker_channel is None else '?speaker_channel={}'.format(speaker_channel)

def _build_transcript_url(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
"""Build the get transcript url.
:param id_: id of job to be requested
:param group_channels_by: optional, GroupChannelsType grouping strategy for
multichannel transcripts. None for default.
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
None for default.
:returns: url for getting the transcript
"""
params = []
if group_channels_by is not None:
params.append('group_channels_by={}'.format(group_channels_by))
if group_channels_threshold_ms is not None:
params.append('group_channels_threshold_ms={}'.format(group_channels_threshold_ms))

query = '?{}'.format('&'.join(params))
return urljoin(self.base_url, 'jobs/{}/transcript{}'.format(id_, query))
2 changes: 1 addition & 1 deletion src/rev_ai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .customvocabulary import CustomVocabulary
from .streaming import MediaConfig
from .asynchronous import Job, JobStatus, Account, Transcript, Monologue, Element, CaptionType, \
SpeakerName
SpeakerName, GroupChannelsType
from .insights import TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
SentimentAnalysisResult, SentimentValue, SentimentMessage, SentimentAnalysisJob
from .language_id import LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
Expand Down
1 change: 1 addition & 0 deletions src/rev_ai/models/asynchronous/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .account import Account
from .transcript import Transcript, Monologue, Element
from .speaker_name import SpeakerName
from .group_channels_type import GroupChannelsType
14 changes: 14 additions & 0 deletions src/rev_ai/models/asynchronous/group_channels_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
"""Enum for group_channels_by types"""

from enum import Enum


class GroupChannelsType(str, Enum):
SPEAKER = 'speaker'
SENTENCE = 'sentence'
WORD = 'word'

@classmethod
def from_string(cls, status):
return cls[status.upper()]
Loading

0 comments on commit e36130e

Please sign in to comment.