Example Solr queries

For additional examples and explanations, check out the Apache Lucene Query Parser Syntax page.

Find everything

result <- query(adc, list(q="*:*",
                               fl="*",
                               rows="20"),
                as = "data.frame")

Query a wildcard expression

#find any id that starts with arctic-data.6
result <- query(adc, list(q="id:arctic-data.6*",
                               rows="5000"),
                as = "data.frame")

Query multiple fields

result <- query(adc, list(q="title:soil*+AND+origin:Ludwig",
                               rows="5000"),
                as = "data.frame")
result <- query(adc, list(q="title:soil* OR origin:Ludwig",
                               rows="5000"),
                as = "data.frame")

You can use either spaces or + to separate query parameters. When typing queries in R, it’s often easier to read if you use spaces. However, when using the browser, you may want to use + to keep the query clean. (The browser will replace spaces with %20.)

Query by formatType

Objects are queryable via their formatType which is one of DATA, METADATA, or RESOURCE.

result <- query(adc, list(q = 'formatType:RESOURCE AND submitter:"http://orcid.org/0000-0002-2561-5840"',
                          fl = 'identifier,submitter,fileName',
                          sort = 'dateUploaded+desc',
                          rows='10'),
                as = "data.frame")

result <- query(adc, list(q = 'formatType:METADATA AND title:*Toolik*',
                          fl = 'identifier,submitter,fileName',
                          sort = 'dateUploaded+desc',
                          rows='10'),
                as = "data.frame")

Query pids by a specific submitter

result <- query(adc, list(q = 'submitter:"http://orcid.org/0000-0003-4703-1974"',
               fl = 'identifier,submitter,fileName, size',
               sort = 'dateUploaded+desc',
               rows='1000'),
      as = "data.frame")

Query pids with special characters

# Wrap the pid with special characters with escaped backslashes
dataone::query(adc, list(q = paste0('id:', '\"', 'doi:10.18739/A20R9M36V', '\"'),
                         fl = 'dateUploaded AND identifier',
                         rows = 5000),
               as = "data.frame")

Query multiple conditions within one field

result <- query(adc, list(q="title:(soil* AND carbo*)",
                               rows="5000"),
                as = "data.frame") 

Query for latest versions only

result <- query(adc, list(q = "rightsHolder:*orcid.org/0000-000X-XXXX-XXXX* AND (*:* NOT obsoletedBy:*)",
                          fl = "identifier,rightsHolder,formatId",
                          start ="0",
                          rows = "1500"),
                     as="data.frame")

Use NOT in a query

Just add - before a query parameter!

result <- query(adc, list(q="title:(soil AND -carbon)",
                               rows="5000"),
                as = "data.frame")

Query a coordinating node

result <- query(cn, list(q="title:soil* AND origin:Ludwig",
                              rows="5000"),
                as = "data.frame")

Query for EMLs that document a specific data pid

# Wrap the pid in escaped quotation marks if it contains special characters 
query(adc, list(q = paste0('documents:', '\"', 'urn:uuid:f551460b-ce36-4dd3-aaa6-3a6c6e338ec9', '\"'),
                fl = "identifier",
                rows="20"),
      as = "data.frame")

# Alternatively we can use a wildcard expression in place of "urn:uuid:"
# This way we don't need to escape the colon special characters 
query(adc, list(q = paste0("documents:*f551460b-ce36-4dd3-aaa6-3a6c6e338ec9"),
                fl = "identifier",
                rows="20"),
      as = "data.frame")

Query for files uploaded during a specific time

query(adc, list(q="dateUploaded:[2020-05-06T00:00:00Z TO NOW]",
                fl="title,identifier,resourceMap,dateUploaded,dateModified",
                sort = "dateUploaded+desc",
                rows="200"), 
      as = "data.frame")
## Error accessing https://arcticdata.io/metacat/d1/mn/v2/query/solr/q=dateUploaded:[2020-05-06T00:00:00Z%20TO%20NOW]&fl=title,identifier,resourceMap,dateUploaded,dateModified&sort=dateUploaded+desc&rows=200: Client error: (400) Bad Request
## NULL

Use facets

All resource maps with > 100 data objects that are not on the Arctic Data Center:

https://cn.dataone.org/cn/v2/query/solr/?q=resourceMap:*+AND+-datasource:*ARCTIC*&rows=0&facet=true&facet.field=resourceMap&facet.mincount=100