Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Elasticsearch in 15 Minutes

Sponsored · Your Podcast. Everywhere. Effortlessly. Share. Educate. Inspire. Entertain. You do you. We'll handle the rest.

Elasticsearch in 15 Minutes

Short overview of Elasticsearch features at the Prague user group meetup 27/6/2013

Avatar for Karel Minarik

Karel Minarik

June 27, 2013
Tweet

More Decks by Karel Minarik

Other Decks in Technology

Transcript

  1. Update a document... $ curl -X PUT localhost:9200/products/product/1 -d '{

    "title" : "Welcome to the Elasticsearch meetup!" }'
  2. A curl  -­‐XPUT  'http://localhost:9200/a/'  -­‐d  '{        "settings"

     :  {                "index"  :  {                        "number_of_shards"      :  3,                        "number_of_replicas"  :  1                }        } }' Index is partitioned into 3 primary shards, each is duplicated in 1 replica shard A1 A2 A3 Replicas Primaries A1' A2' A3'
  3. 1 node 2 nodes 3 nodes Demo "index.routing.allocation.exclude.name"    

     :  "Node1" "cluster.routing.allocation.exclude.name"  :  "Node3" ... http://git.io/elasticat
  4. {    "id"        :  "abc123",    "title"

     :  "A  JSON  Document",    "body"    :  "A  JSON  document  is  a  ...",    "published_on"  :  "2013/06/27  10:00:00",    "featured"          :  true,        "tags"    :  ["search",  "json"],    "author"  :  {        "first_name"  :  "Clara",        "last_name"    :  "Rice",        "email"            :  "[email protected]"    } } Documents as JSON Data structure with basic types, arrays and deep hierarchies
  5. Terms apple apple  iphone Phrases "apple  iphone" Proximity "apple  safari"~5

    Fuzzy apple~0.8 Wildcards app* *pp* Boosting apple^10  safari Range [2011/05/01  TO  2011/05/31] [java  TO  json] Boolean apple  AND  NOT  iphone +apple  -­‐iphone (apple  OR  iphone)  AND  NOT  review Fields title:iphone^15  OR  body:iphone published_on:[2011/05/01  TO  "2011/05/27  10:00:00"] http://lucene.apache.org/java/3_1_0/queryparsersyntax.html $  curl  -­‐X  GET  "http://localhost:9200/_search?q=<YOUR  QUERY>"
  6. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  7. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  8. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  9. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  10. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  11. “Find all articles with ‘search’ in their title or body,

    give matches in titles higher score” Full-text Search “Find all articles from year 2013 tagged ‘search’” Structured Search See custom_score and custom_filters_score queries Custom Scoring
  12. Fetch document field ➝ Pick configured analyzer ➝ Parse text

    into tokens ➝ Apply token filters ➝ Store into index How Search Engine Works? Result Results Query How Users See Search?
  13. Mapping curl -X PUT localhost:9200/articles/_mapping -d '{ "article" : {

    "properties" : { "title" : { "type" : "string", "analyzer" : "czech" } } } }' Configuring document properties for the search engine
  14. _analyze?pretty&format=text&text=Žluťoučký+kůň+skákal+přes+potok The _analyze API [žluťoučký:0-­‐>9:<ALPHANUM>] \n\n2:  \n[kůň:10-­‐ >13:<ALPHANUM>]\n\n3:   \n[skákal:14-­‐>20:<ALPHANUM>]

    \n\n4:  \n[přes:21-­‐ >25:<ALPHANUM>]\n\n5:   \n[potok:26-­‐>31:<ALPHANUM>] _analyze?pretty&format=text&text=Žluťoučký+kůň+skákal+přes +potok&analyzer=czech [žluťoučk:0-­‐>9:<ALPHANUM>]\n \n2:  \n[koň:10-­‐ >13:<ALPHANUM>]\n\n3:   \n[skákal:14-­‐>20:<ALPHANUM>] \n\n5:  \n[potok:26-­‐ >31:<ALPHANUM>]\n _analyze?text=...&tokenizer=X&filters=A,B,C
  15. Slice Dice Drill Down / Roll Up Show me sales

    numbers for all products across all locations in year 2013 Show me product A sales numbers across all locations over all years Show me products sales numbers in location X over all years
  16. curl -X POST 'localhost:9200/articles/_search?search_type=count&pretty' -d '{ "facets": { "tag-cloug": {

    "terms" : { "field" : "tags" } } } }' “Tag Cloud” With the terms Facet "facets"  :  {        "tag-­‐cloug"  :  {            "terms"  :  [  {                "term"  :  "ruby",                "count"  :  3            },  {                "term"  :  "java",                "count"  :  2            },            ...            }  ]        }    } Simplest “map/reduce” aggregation: document count per tag
  17. curl -X GET 'localhost:9200/scores/_search/?search_type=count&pretty' -d '{ "facets": { "scores-per-subject" :

    { "terms_stats" : { "key_field" : "subject", "value_field" : "score" } } } }' Statistics on Student Scores With the terms_stats Facet "facets"  :  {        "scores-­‐per-­‐subject"  :  {            "_type"  :  "terms_stats",            "missing"  :  0,            "terms"  :  [  {                "term"  :  "math",                "count"  :  4,                "total_count"  :  4,                "min"  :  25.0,                "max"  :  92.0,                "total"  :  267.0,                "mean"  :  66.75            },  ...  ]        }    } Aggregating statistics per subject
  18. curl -X GET 'localhost:9200/demo-scores/_search/?search_type=count&pretty' '{ "query" : { "match" :

    { "student" : "john" } }, "facets": { "scores-per-subject" : { "terms_stats" : { "key_field" : "subject", "value_field" : "score" } } } }' Statistics on Student Scores With the terms_stats Facet "facets"  :  {        "scores-­‐per-­‐subject"  :  {            "_type"  :  "terms_stats",            "missing"  :  0,            "terms"  :  [    {                "term"  :  "math",                "count"  :  1,                "total_count"  :  1,                "min"  :  85.0,                "max"  :  85.0,                "total"  :  85.0,                "mean"  :  85.0            },  ...  ]        }    } Realtime filtering with queries and filters
  19. Above & Beyond Bulk operations (For indexing and search operations)

    Percolator (“reversed search” — alerts, classification, …) Suggesters (“Did you mean …?”) Index aliases (Grouping or “renaming” of indices) Index templates (Automatic index configuration) Monitoring API (Amount of memory used, number of operations, …) …