Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Elasticsearch in 15 Minutes

Elasticsearch in 15 Minutes

Short overview of Elasticsearch features at the Prague user group meetup 27/6/2013

Karel Minarik

June 27, 2013
Tweet

More Decks by Karel Minarik

Other Decks in Technology

Transcript

  1. Update a document... $ curl -X PUT localhost:9200/products/product/1 -d '{

    "title" : "Welcome to the Elasticsearch meetup!" }'
  2. A curl  -­‐XPUT  'http://localhost:9200/a/'  -­‐d  '{        "settings"

     :  {                "index"  :  {                        "number_of_shards"      :  3,                        "number_of_replicas"  :  1                }        } }' Index is partitioned into 3 primary shards, each is duplicated in 1 replica shard A1 A2 A3 Replicas Primaries A1' A2' A3'
  3. 1 node 2 nodes 3 nodes Demo "index.routing.allocation.exclude.name"    

     :  "Node1" "cluster.routing.allocation.exclude.name"  :  "Node3" ... http://git.io/elasticat
  4. {    "id"        :  "abc123",    "title"

     :  "A  JSON  Document",    "body"    :  "A  JSON  document  is  a  ...",    "published_on"  :  "2013/06/27  10:00:00",    "featured"          :  true,        "tags"    :  ["search",  "json"],    "author"  :  {        "first_name"  :  "Clara",        "last_name"    :  "Rice",        "email"            :  "[email protected]"    } } Documents as JSON Data structure with basic types, arrays and deep hierarchies
  5. Terms apple apple  iphone Phrases "apple  iphone" Proximity "apple  safari"~5

    Fuzzy apple~0.8 Wildcards app* *pp* Boosting apple^10  safari Range [2011/05/01  TO  2011/05/31] [java  TO  json] Boolean apple  AND  NOT  iphone +apple  -­‐iphone (apple  OR  iphone)  AND  NOT  review Fields title:iphone^15  OR  body:iphone published_on:[2011/05/01  TO  "2011/05/27  10:00:00"] http://lucene.apache.org/java/3_1_0/queryparsersyntax.html $  curl  -­‐X  GET  "http://localhost:9200/_search?q=<YOUR  QUERY>"
  6. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  7. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  8. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  9. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  10. curl  -­‐X  GET  localhost:9200/articles/_search  -­‐d  '{ "query" : { "filtered"

    : { "query" : { "bool" : { "must" : { "match" : { "author.first_name" : { "query" : "claire", "fuzziness" : 0.1 } } }, "must" : { "multi_match" : { "query" : "elasticsearch", "fields" : ["title^10", "body"] } } } }, "filter": { "and" : [ { "terms" : { "tags" : ["search"] } }, { "range" : { "published_on": {"from": "2013"} } }, { "term" : { "featured" : true } } ] } } } }' JSON-based Query DSL
  11. “Find all articles with ‘search’ in their title or body,

    give matches in titles higher score” Full-text Search “Find all articles from year 2013 tagged ‘search’” Structured Search See custom_score and custom_filters_score queries Custom Scoring
  12. Fetch document field ➝ Pick configured analyzer ➝ Parse text

    into tokens ➝ Apply token filters ➝ Store into index How Search Engine Works? Result Results Query How Users See Search?
  13. Mapping curl -X PUT localhost:9200/articles/_mapping -d '{ "article" : {

    "properties" : { "title" : { "type" : "string", "analyzer" : "czech" } } } }' Configuring document properties for the search engine
  14. _analyze?pretty&format=text&text=Žluťoučký+kůň+skákal+přes+potok The _analyze API [žluťoučký:0-­‐>9:<ALPHANUM>] \n\n2:  \n[kůň:10-­‐ >13:<ALPHANUM>]\n\n3:   \n[skákal:14-­‐>20:<ALPHANUM>]

    \n\n4:  \n[přes:21-­‐ >25:<ALPHANUM>]\n\n5:   \n[potok:26-­‐>31:<ALPHANUM>] _analyze?pretty&format=text&text=Žluťoučký+kůň+skákal+přes +potok&analyzer=czech [žluťoučk:0-­‐>9:<ALPHANUM>]\n \n2:  \n[koň:10-­‐ >13:<ALPHANUM>]\n\n3:   \n[skákal:14-­‐>20:<ALPHANUM>] \n\n5:  \n[potok:26-­‐ >31:<ALPHANUM>]\n _analyze?text=...&tokenizer=X&filters=A,B,C
  15. Slice Dice Drill Down / Roll Up Show me sales

    numbers for all products across all locations in year 2013 Show me product A sales numbers across all locations over all years Show me products sales numbers in location X over all years
  16. curl -X POST 'localhost:9200/articles/_search?search_type=count&pretty' -d '{ "facets": { "tag-cloug": {

    "terms" : { "field" : "tags" } } } }' “Tag Cloud” With the terms Facet "facets"  :  {        "tag-­‐cloug"  :  {            "terms"  :  [  {                "term"  :  "ruby",                "count"  :  3            },  {                "term"  :  "java",                "count"  :  2            },            ...            }  ]        }    } Simplest “map/reduce” aggregation: document count per tag
  17. curl -X GET 'localhost:9200/scores/_search/?search_type=count&pretty' -d '{ "facets": { "scores-per-subject" :

    { "terms_stats" : { "key_field" : "subject", "value_field" : "score" } } } }' Statistics on Student Scores With the terms_stats Facet "facets"  :  {        "scores-­‐per-­‐subject"  :  {            "_type"  :  "terms_stats",            "missing"  :  0,            "terms"  :  [  {                "term"  :  "math",                "count"  :  4,                "total_count"  :  4,                "min"  :  25.0,                "max"  :  92.0,                "total"  :  267.0,                "mean"  :  66.75            },  ...  ]        }    } Aggregating statistics per subject
  18. curl -X GET 'localhost:9200/demo-scores/_search/?search_type=count&pretty' '{ "query" : { "match" :

    { "student" : "john" } }, "facets": { "scores-per-subject" : { "terms_stats" : { "key_field" : "subject", "value_field" : "score" } } } }' Statistics on Student Scores With the terms_stats Facet "facets"  :  {        "scores-­‐per-­‐subject"  :  {            "_type"  :  "terms_stats",            "missing"  :  0,            "terms"  :  [    {                "term"  :  "math",                "count"  :  1,                "total_count"  :  1,                "min"  :  85.0,                "max"  :  85.0,                "total"  :  85.0,                "mean"  :  85.0            },  ...  ]        }    } Realtime filtering with queries and filters
  19. Above & Beyond Bulk operations (For indexing and search operations)

    Percolator (“reversed search” — alerts, classification, …) Suggesters (“Did you mean …?”) Index aliases (Grouping or “renaming” of indices) Index templates (Automatic index configuration) Monitoring API (Amount of memory used, number of operations, …) …