oracle aide

August 21, 2015

Dynamo DB Local : a missing tutorial for Python

Filed under: Uncategorized — oracleaide @ 7:43 pm

Dynamo DB Local is an excellent learning and testing tool.
It has a JavaScript shell with a useful but verbose tutorial:
DynamoDBLocal: Downloading And Running

DynamoDBLocal: Javascript Shell

Here is the same tutorial, translated to Python 2.7.

0. Install Python 2.7 and boto.

1. Launch the local dynamo db:
java -Djava.library.path=d:/app/dynamodb/DynamoDBLocal_lib -jar DynamoDBLocal.jar -sharedDb

2. Launch the console to see the tables and changes
http://localhost:8000/shell/

3. Launch python

4. Import boto
from boto.dynamodb2.layer1 import DynamoDBConnection
from boto.dynamodb2.fields import HashKey
from boto.dynamodb2.table import Table

5. Connect to the local dynamo
conn = DynamoDBConnection(aws_access_key_id=’foo’, aws_secret_access_key=’bar’, host='localhost', port=8000, is_secure=False)
>>> conn
DynamoDBConnection:localhost

6. List the tables
>>> conn.list_tables()
{u'TableNames': [u'Games', u'Image', u'ImageTag', u'x01', u'z02', u'z_local']}
>>>

7. Create a table and describe it
z = Table.create('z03', schema=[HashKey('xid')], connection = conn);
z.describe()

8. Drop tables (aka DELETE!!!)

>>> x=Table('z03',connection=conn)
>>> x.describe()
{u'Table': {u'TableArn': u'arn:aws:dynamodb:ddblocal:00000000000
rovisionedThroughput': {u'NumberOfDecreasesToday': 0, u'WriteCap
0.0}, u'TableSizeBytes': 0, u'TableName': u'z03', u'TableStatus'
'CreationDateTime': 1439230827.371}}
>>> x.delete()
True
>>>

9. Delete all tables
tt = conn.list_tables()
for t in tt['TableNames']:
x = Table(t,connection=conn);
#x.describe()
x.delete()

conn.list_tables()
{u'TableNames': []}

9. Create image table with global secondary index

from boto.dynamodb2.fields import HashKey, RangeKey, KeysOnlyIndex, GlobalAllIndex
from boto.dynamodb2.table import Table
from boto.dynamodb2.types import NUMBER

image = Table.create(
'image',
schema=[HashKey('id'),],
throughput={'read': 1,'write': 1},
global_indexes=[
GlobalAllIndex(
'imageIndex', parts=[HashKey('id'),],
throughput={'read': 1,'write': 1,})
],
connection = conn
);

>>> image.describe()
{
u'Table': {
u'TableArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image',
u'AttributeDefinitions': [{
u'AttributeName': u'id',
u'AttributeType': u'S'
}],
u'GlobalSecondaryIndexes': [{
u'IndexSizeBytes': 0,
u'IndexName': u'imageIndex',
u'Projection': {
u'ProjectionType': u'ALL'
},
u'ProvisionedThroughput': {
u'WriteCapacityUnits': 1,
u'ReadCapacityUnits': 1
},
u'IndexStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'id'
}],
u'IndexArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image/index/imageIndex',
u'ItemCount': 0
}],
u'ProvisionedThroughput': {
u'NumberOfDecreasesToday': 0,
u'WriteCapacityUnits': 1,
u'LastIncreaseDateTime': 0.0,
u'ReadCapacityUnits': 1,
u'LastDecreaseDateTime': 0.0
},
u'TableSizeBytes': 0,
u'TableName': u'image',
u'TableStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'id'
}],
u'ItemCount': 0,
u'CreationDateTime': 1439232281.427
}
}

10. Create table without an index
image.delete()
image = Table.create(
'image',
schema=[HashKey('id'),],
throughput={'read': 1,'write': 1},
global_indexes=[],
connection = conn
);

>>> image.describe()
{
u'Table': {
u'TableArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image',
u'AttributeDefinitions': [{
u'AttributeName': u'id',
u'AttributeType': u'S'
}],
u'ProvisionedThroughput': {
u'NumberOfDecreasesToday': 0,
u'WriteCapacityUnits': 1,
u'LastIncreaseDateTime': 0.0,
u'ReadCapacityUnits': 1,
u'LastDecreaseDateTime': 0.0
},
u'TableSizeBytes': 0,
u'TableName': u'image',
u'TableStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'id'
}],
u'ItemCount': 0,
u'CreationDateTime': 1439232459.338
}
}>>>

# the difference - no global secondary index

11. Insert an item into the table

import time
#print current time
time.time()

image.put_item(data = {'id':'dynamodb.png', 'dateAdded':time.time(), 'voteCount':0});
True

12. Get an item

>>> x = image.get_item(id='dynamodb.png')
>>> x

>>> x['id']
u'dynamodb.png'
>>> x['dateAdded']
Decimal('1439233446.11100006103515625')
>>> x['voteCount']
Decimal('0')

# convert timestamp to date
>>> from datetime import date
>>> date.today()
datetime.date(2015, 8, 10)
>>> date.fromtimestamp(x['dateAdded'])
datetime.date(2015, 8, 10)
>>>

13. Batch write items (max batch size is 25)

urls = [ 'android.png', 'appstream.png', 'cli.png', 'cloudformation.png',
'cloudfront.png', 'cloudsearch.png', 'cloudtrail.png', 'cloudwatch.png', 'data-pipeline.png',
'direct-connect.png', 'dotnet.png', 'dynamodb.png', 'ec2.png', 'eclipse.png', 'elasticache.png',
'elastic-beanstalk.png', 'elb.png', 'emr.png', 'glacier.png', 'iam.png', 'ios.png', 'java.png',
'nodejs.png', 'opsworks.png', 'php.png', 'powershell.png', 'python.png', 'rds.png', 'redshift.png',
'route53.png', 'ruby.png', 's3.png', 'ses.png', 'sns.png', 'storage-gateway.png', 'swf.png',
'transcoding.png', 'visual-studio.png', 'vpc.png'
];

len(urls)
>>> 39

for x in urls:
with image.batch_write() as batch:
batch.put_item(data = {'id':x, 'dateAdded':time.time(), 'voteCount':0});
>>>
>>> x= image.get_item(id='android.png')
>>> x

>>> x['id']
u'android.png'
>>>

14. Scan the whole table

tt = image.scan()
for t in tt:
print tt['id']

>>> tt=image.scan()
>>> nn=list(tt)
>>> len(nn)
39
>>>

15. Scan the table limit / no limit

#without limit
tt = image.scan(max_page_size=3)
for t in tt:
print t['id']
#returns all

# with limit

tt = image.scan(limit=6, max_page_size=3)
for t in tt:
print t['id']
# returns 6
python.png
cloudformation.png
ec2.png
cloudsearch.png
nodejs.png
elasticache.png
>>>

16. Create a table with 2 indexes

from boto.dynamodb2.fields import HashKey, RangeKey, KeysOnlyIndex, GlobalAllIndex
from boto.dynamodb2.table import Table
from boto.dynamodb2.types import NUMBER
from boto.dynamodb2.fields import GlobalKeysOnlyIndex
from boto.dynamodb2.fields import AllIndex
it = Table.create('image_tag',
schema=[HashKey('tag'),RangeKey('image_id'),],
throughput={'read': 1,'write': 1,},
global_indexes=[
GlobalKeysOnlyIndex('image_id_index',
parts=[HashKey('image_id'), RangeKey('tag'),],
throughput={'read': 1,'write': 1,}
)
],
indexes=[
AllIndex('vote_count_index',
parts=[HashKey('tag'), RangeKey('vote_count', data_type=NUMBER),],
)
],
connection = conn);

>>> it.describe()
>> it.describe()
u'Table': {
u'TableArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image_tag',
u'LocalSecondaryIndexes': [{
u'IndexSizeBytes': 0,
u'IndexName': u'vote_countindex',
u'Projection': {
u'ProjectionType': u'ALL'
},
u'IndexArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image_tag/index/vote_count_index',
u'KeySchema'[{
u'KeyType': u'HASH',
u'AttributeName': u'tag'
},
{
u'KeyType': u'RANGE',
u'AttributeName': u'vote_count'
}],
u'ItemCount': 0
}],
u'AttributeDefinitions': [{
u'AtributeName': u'tag',
u'AttributeType': u'S'
},
{
u'AttributeName': u'image_id',
u'AttributeType': u'S'
},
{
u'AttributeName': u'vote_count',
u'AttributeType': u'N'],
u'GlobalSecondaryIndexes': [{
u'IndexSizeBytes': 0,
u'IndexName': u'image_id_index',
u'Projection': {
u'ProjectionType': u'KEYS_ONLY'
},
u'ProvisionedThroughpu': {
u'WriteCapacityUnits': 1,
u'ReadCapacityUnits': 1
},
u'IndexStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'image_id'
},
{
u'KeyTpe': u'RANGE',
u'AttributeName': u'tag'
}],
u'IndexArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/image_tag/index/image_id_index',
u'ItemCount': 0
}],
u'PrvisionedThroughput': {
u'NumberOfDecreasesToday': 0,
u'WriteCapacityUnits': 1,
u'LastIncreaseDateTime': 0.0,
u'ReadCapacityUnits': 1,
u'LastDecreaseDateTime': 00
},
u'TableSizeBytes': 0,
u'TableName': u'image_tag',
u'TableStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'tag'
},
{
u'KeyType': uRANGE',
u'AttributeName': u'image_id'
}],
u'ItemCount': 0,
u'CreationDateTime': 1439318478.098
}
}

17. Load data into the image_tag_index table

// This short program will load in a bunch of example data into the ImageTag table.
// A dictionary of image id to the tags to attach to the image.
images = {
'android.png': ['SDKs & Tools', 'Android'],
'appstream.png': ['Application Services', 'Amazon AppStream'],
'cli.png': ['SDKs & Tools', 'AWS CLI'],
'cloudformation.png': ['Deployment & Management', 'AWS CloudFormation'],
'cloudfront.png': ['Storage & CDN', 'Amazon CloudFront'],
'cloudsearch.png': ['Application Services', 'Amazon CloudSearch'],
'cloudtrail.png': ['Deployment & Management', 'AWS CloudTrail'],
'cloudwatch.png': ['Deployment & Management', 'Amazon CloudWatch'],
'data-pipeline.png': ['Analytics', 'AWS Data Pipeline'],
'direct-connect.png': ['Compute & Networking', 'AWS Direct Connect'],
'dotnet.png': ['SDKs & Tools', '.NET'],
'dynamodb.png': ['Database', 'Amazon DynamoDB'],
'ec2.png': ['Compute & Networking', 'Amazon EC2'],
'eclipse.png': ['SDKs & Tools', 'Eclipse'],
'elasticache.png': ['Database', 'Amazon ElastiCache'],
'elastic-beanstalk.png': ['Deployment & Management', 'AWS Elastic Beanstalk'],
'elb.png': ['Compute & Networking', 'Elastic Load Balancing'],
'emr.png': ['Analytics', 'Amazon EMR'],
'glacier.png': ['Storage & CDN', 'Amazon Glacier'],
'iam.png': ['Deployment & Management', 'AWS IAM'],
'ios.png': ['SDKs & Tools', 'iOS'],
'java.png': ['SDKs & Tools', 'Java'],
'kinesis.png': ['Analytics', 'Amazon Kinesis'],
'nodejs.png': ['SDKs & Tools', 'Node.js'],
'opsworks.png': ['Deployment & Management', 'AWS OpsWorks'],
'php.png': ['SDKs & Tools', 'PHP'],
'powershell.png': ['SDKs & Tools', 'PowerShell'],
'python.png': ['SDKs & Tools', 'Python'],
'rds.png': ['Database', 'Amazon RDS'],
'redshift.png': ['Database', 'Amazon Redshift'],
'route53.png': ['Compute & Networking', 'Amazon Route 53'],
'ruby.png': ['SDKs & Tools', 'Ruby'],
's3.png': ['Storage & CDN', 'Amazon S3'],
'ses.png': ['Application Services', 'Amazon SES'],
'sns.png': ['Application Services', 'Amazon SNS'],
'sqs.png': ['Application Services', 'Amazon SQS'],
'storage-gateway.png': ['Storage & CDN', 'Amazon Storage Gateway'],
'swf.png': ['Application Services', 'Amazon SWF'],
'transcoding.png': ['Application Services', 'Amazon Elastic Transcoder'],
'visual-studio.png': ['SDKs & Tools', 'Visual Studio'],
'vpc.png': ['Compute & Networking', 'Amazon VPC']
};

len(images)
>>> 41

i = 0
for x in images:
print x, str(images[x]), i
for s in images[x]:
print s
i+=1
with it.batch_write() as batch:
batch.put_item(data = {'image_id':x, 'tag':s, 'vote_count':i});

18. Scan the table

>>> rr = it.scan()
>>> for r in rr:
... print r['image_id'],r['tag'],r['vote_count']
...
vpc.png Amazon VPC 22
rds.png Amazon RDS 6
cli.png AWS CLI 48
ios.png iOS 60
opsworks.png AWS OpsWorks 8
data-pipeline.png AWS Data Pipeline 26
emr.png Amazon EMR 66

19. Query the table

>>> rr = it.query_2(tag__eq='Database')
>>> for r in rr:
... print r['image_id'],r['tag'],r['vote_count']
...
dynamodb.png Database 1
elasticache.png Database 73
rds.png Database 5
redshift.png Database 31
>>>

20. Scan with filter

rr = it.scan(image_id__eq='vpc.png')
for r in rr:
print r['image_id'],r['tag'],r['vote_count']

...
vpc.png Amazon VPC 22
vpc.png Compute & Networking 21

21. Query with index
This is #19 revisited: get items with tag ‘Database’ and vote_index over 20.

rr = it.query_2(index='vote_count_index', tag__eq='Database', vote_count__gt=5)
for r in rr:
print r['image_id'],r['tag'],r['vote_count']

elasticache.png Database 73
>>> rr = it.query_2(index='vote_count_index', tag__eq='Database', vote_count__gt=5)
>>> for r in rr:
... print r['image_id'],r['tag'],r['vote_count']
...
redshift.png Database 31
elasticache.png Database 73

22. Query using index, get all the tags for the image
rr = it.query_2(index='image_id_index', image_id__eq='dynamodb.png')
for r in rr:
print r['image_id'],r['tag'],r['vote_count']

..
dynamodb.png Amazon DynamoDB None
dynamodb.png Database None
>>

23. Create the tag table

tt = Table.create('tag', schema=[HashKey('tag')], connection = conn);

>>> tt.describe()
{
u'Table': {
u'TableArn': u'arn: aws: dynamodb: ddblocal: 000000000000: table/tag',
u'AttributeDefinitions': [{
u'AttributeName': u'tag',
u'AttributeType': u'S'
}],
u'ProvisionedThroughput': {
u'NumberOfDecreasesToday': 0,
u'WriteCapacityUnits': 5,
u'LastIncreaseDateTime': 0.0,
u'ReadCapacityUnits': 5,
u'LastDecreaseDateTime': 0.0
},
u'TableSizeBytes': 0,
u'TableName': u'tag',
u'TableStatus': u'ACTIVE',
u'KeySchema': [{
u'KeyType': u'HASH',
u'AttributeName': u'tag'
}],
u'ItemCount': 0,
u'CreationDateTime': 1439326646.757
}


24. Load the tag table data

tags = {
'SDKs & Tools': 12,
'Application Services': 7,
'Deployment & Management': 6,
'Storage & CDN': 4,
'Analytics': 3,
'Compute & Networking': 5,
'Database': 4,
'Android': 1,
'Amazon AppStream': 1,
'AWS CLI': 1,
'AWS CloudFormation': 1,
'Amazon CloudFront': 1,
'Amazon CloudSearch': 1,
'AWS CloudTrail': 1,
'AWS Data Pipeline': 1,
'AWS Direct Connect': 1,
'.NET': 1,
'Amazon DynamoDB': 1,
'Amazon EC2': 1,
'Eclipse': 1,
'Amazon ElastiCache': 1,
'AWS Elastic Beanstalk': 1,
'Elastic Load Balancing': 1,
'Amazon EMR': 1,
'Amazon Glacier': 1,
'AWS IAM': 1,
'iOS': 1,
'Java': 1,
'Amazon Kinesis': 1,
'Node.js': 1,
'AWS OpsWorks': 1,
'PHP': 1,
'PowerShell': 1,
'Python': 1,
'Amazon RDS': 1,
'Amazon Redshift': 1,
'Amazon Route 53': 1,
'Ruby': 1,
'Amazon S3': 1,
'Amazon SES': 1,
'Amazon SNS': 1,
'Amazon SQS': 1,
'Amazon Storage Gateway': 1,
'Amazon SWF': 1,
'Amazon Elastic Transcoder': 1,
'Visual Studio': 1,
'Amazon VPC': 1
}

for x in tags:
print x, str(tags[x])
with tt.batch_write() as batch:
batch.put_item(data = {'tag':x, 'image_count':tags[x]});

>>> for r in rr:
... print r['tag'],r['image_count']
...
Amazon VPC 1
Amazon RDS 1
AWS CLI 1
iOS 1
AWS OpsWorks 1
AWS Data Pipeline 1
Amazon EMR 1

Advertisements

Leave a Comment »

No comments yet.

RSS feed for comments on this post. TrackBack URI

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Blog at WordPress.com.

%d bloggers like this: