Skip to content

Commit

Permalink
Merge pull request #5 from FindHotel/s3-upload
Browse files Browse the repository at this point in the history
DIN-34 Add S3 transport
  • Loading branch information
velppa authored Dec 14, 2018
2 parents 8382a5f + 02b37d9 commit 33090b2
Show file tree
Hide file tree
Showing 11 changed files with 309 additions and 28 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ dist
MANIFEST
build
.eggs
.env
.env
.pytest_cache
6 changes: 4 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
language: python
sudo: required
dist: xenial
python:
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- "3.7"
install:
- "pip install ."
script: make test
Expand Down
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
1.5.0 / 2018-12-14
==================

* Add S3 transport to upload files directly to S3.

1.3.1 / 2018-01-06
==================

Expand Down
71 changes: 65 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
analytics-python
==============
# analytics-python

[![Build Status](https://travis-ci.org/FindHotel/analytics-python.svg?branch=master)](https://travis-ci.org/FindHotel/analytics-python)

Expand All @@ -9,27 +8,87 @@ analytics-python is a python client is a slightly modified version of [Segment's

## Usage

The documentation for Segment's Python SDK that this repository is based on
is available at [https://segment.com/libraries/python](https://segment.com/libraries/python).
Check Segment's docs to get familiar with the API.

You can package directly, in this case default `http` transport will be used:

```python
import analytics

# This key will be passed in the `x-api-key` header of every request
analytics.write_key='AWS_API_GATEWAY_KEY'

# The custom endpoint to where the events will be delivered to
analytics.endpoint='https://polku.fih.io/dev/[hookname]'
analytics.endpoint='https://segment.fih.io/v1/[endpoint-key]'

analytics.track('kljsdgs99', 'SignedUp', {'plan': 'Enterprise'})
analytics.flush()
```

Use client with custom error handling function:

## More information
```python

The documentation for Segment's Python SDK that this repository is based on is available at [https://segment.com/libraries/python](https://segment.com/libraries/python). You can use Segment's docs to get familiar with the API.
import analytics

ANALYTICS_WRITE_KEY='AWS_API_GATEWAY_KEY'
ANALYTICS_ENDPOINT='https://segment.fih.io/v1/[endpoint-key]'

## License
def log_error(e, batch):
print("exception: {}, batch: {}".format(e, batch), flush=True)

client = analytics.Client(
endpoint=ANALYTICS_ENDPOINT,
write_key=ANALYTICS_WRITE_KEY,
debug=analytics.debug,
on_error=log_error,
send=analytics.send,
max_queue_size=analytics.max_queue_size,
upload_size=analytics.upload_size
)

client.track(...)
client.flush()
```

### Using S3 transport

When using `s3` transport SDK will upload data directly to AWS S3 bypassing http interface.

```python

MB = 1024*1024

c = Client(
write_key="write-key",
endpoint="https://segment.fih.io/v1/[endpoint-key]",
upload_size=1*MB,
transport='s3',
max_queue_size=1000000,
)

for i in range(30000):
c.track(
user_id='pavel',
event='UUIDGenerated',
properties=dict(id=str(uuid.uuid4()), counter=i)
)
if i % 10000 == 0:
c.flush()

c.flush()
assert False
```

## More information

The documentation for Segment's Python SDK that this repository is based on is available at [https://segment.com/libraries/python](https://segment.com/libraries/python). You can use Segment's docs to get familiar with the API.

## License

```txt
WWWWWW||WWWWWW
W W W||W W W
||
Expand Down
3 changes: 2 additions & 1 deletion analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""Settings."""
write_key = None
endpoint = 'https://api.segment.io/v1/batch'
transport = 'http'
max_queue_size = 10000
upload_size = 100
on_error = None
Expand Down Expand Up @@ -55,7 +56,7 @@ def _proxy(method, *args, **kwargs):
default_client = Client(write_key, debug=debug, on_error=on_error,
send=send, endpoint=endpoint,
max_queue_size=max_queue_size,
upload_size=upload_size)
upload_size=upload_size, transport=transport)

fn = getattr(default_client, method)
fn(*args, **kwargs)
26 changes: 21 additions & 5 deletions analytics/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from analytics.utils import guess_timezone, clean
from analytics.consumer import Consumer
from analytics.s3_consumer import S3Consumer
from analytics.version import VERSION

try:
Expand All @@ -22,22 +23,37 @@


class Client(object):
"""Create a new Segment client."""
"""Create a new Segment client.
upload_size has different meaning, depending on chosen transport.
For http transport upload_size means number of items to be batched
in a single POST request to backend.
For s3 transport upload_size means size in bytes of _uncompressed_
partition of the data. Sane default value is between 10 and 100 MB
depending on compressability of underlying data.
"""
log = logging.getLogger('segment')

def __init__(self, write_key=None, debug=False, max_queue_size=10000,
send=True, on_error=None, endpoint=None, upload_size=100):
send=True, on_error=None, endpoint=None, upload_size=100,
transport='http'):
require('write_key', write_key, string_types)

self.queue = queue.Queue(max_queue_size)
self.consumer = Consumer(self.queue, write_key, endpoint=endpoint,
on_error=on_error, upload_size=upload_size)
self.write_key = write_key
self.endpoint = endpoint
self.on_error = on_error
self.debug = debug
self.send = send

if transport == 'http':
self.consumer = Consumer(self.queue, write_key, endpoint=endpoint,
on_error=on_error, upload_size=upload_size)
elif transport == 's3':
self.consumer = S3Consumer(self.queue, write_key, endpoint=endpoint,
on_error=on_error, upload_size=upload_size)
else:
raise ValueError("transport should be either http or s3")

if debug:
self.log.setLevel(logging.DEBUG)

Expand Down
8 changes: 2 additions & 6 deletions analytics/consumer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import logging
from threading import Thread
from queue import Empty

import analytics
from analytics.version import VERSION
from analytics.request import post

try:
from queue import Empty
except:
from Queue import Empty

class Consumer(Thread):
"""Consumes the messages from the client's queue."""
Expand Down Expand Up @@ -59,7 +55,7 @@ def upload(self):
self.on_error(e, batch)
finally:
# mark items as acknowledged from queue
for item in batch:
for _ in batch:
self.queue.task_done()
return success

Expand Down
29 changes: 28 additions & 1 deletion analytics/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@

_session = sessions.Session()

@retry(wait_exponential_multiplier=500, wait_exponential_max=5000,
stop_max_delay=20000)
def get(write_key, endpoint):
log = logging.getLogger('segment')
headers = {
'content-type': 'application/json',
'x-api-key': write_key,
}
res = _session.get(endpoint, headers=headers, timeout=15)

if res.status_code == 200:
log.debug('get request is successful')
return res.json()

try:
payload = res.json()
log.debug('received response: %s', payload)
raise APIError(
res.status_code,
payload.get('code', '???'),
payload.get('message', '???'))
except ValueError:
raise APIError(res.status_code, 'unknown', res.text)


@retry(wait_exponential_multiplier=500, wait_exponential_max=5000,
stop_max_delay=20000)
Expand All @@ -21,7 +45,10 @@ def post(write_key, endpoint, **kwargs):
body["sentAt"] = int(time.time()*1000)
auth = HTTPBasicAuth(write_key, '')
data = json.dumps(body, cls=DatetimeSerializer)
headers = { 'content-type': 'application/json', 'x-api-key': write_key }
headers = {
'content-type': 'application/json',
'x-api-key': write_key,
}
log.debug('making request: %s', data)
res = _session.post(endpoint, data=data, auth=auth, headers=headers, timeout=15)

Expand Down
Loading

0 comments on commit 33090b2

Please sign in to comment.