Title: | AIRR Data Representation Reference Library |
---|---|
Description: | Schema definitions and read, write and validation tools for data formatted in accordance with the AIRR Data Representation schemas defined by the AIRR Community <http://docs.airr-community.org>. |
Authors: | Jason Vander Heiden [aut, cre], Susanna Marquez [aut], Scott Christley [aut], Katharina Imkeller [aut], Ulrik Stervbo [aut], AIRR Community [cph] |
Maintainer: | Jason Vander Heiden <[email protected]> |
License: | CC BY 4.0 |
Version: | 1.5.0 |
Built: | 2024-11-21 03:50:03 UTC |
Source: | https://github.com/cran/airr |
Example data files compliant with the the AIRR Data Representation standards.
extdata/rearrangement-example.tsv.gz
: Rearrangement TSV file.
extdata/repertoire-example.yaml
: Repertoire YAML file.
extdata/germline-example.json
: GermlineSet and GenotypeSet JSON file.
# Load Rearrangement example file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") rearrangement <- read_rearrangement(file) # Load Repertoire example file <- system.file("extdata", "repertoire-example.yaml", package="airr") repertoire <- read_airr(file) # Load GermlineSet and GenotypeSet examples file <- system.file("extdata", "germline-example.json", package="airr") germline <- read_airr(file)
# Load Rearrangement example file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") rearrangement <- read_rearrangement(file) # Load Repertoire example file <- system.file("extdata", "repertoire-example.yaml", package="airr") repertoire <- read_airr(file) # Load GermlineSet and GenotypeSet examples file <- system.file("extdata", "germline-example.json", package="airr") germline <- read_airr(file)
load_schema
loads an AIRR object definition from the internal
definition set.
load_schema(definition)
load_schema(definition)
definition |
name of the schema definition. |
Valid definitions include:
"Rearrangement"
"Alignment"
"Repertoire"
"Study"
"Subject"
"Diagnosis"
"Sample"
"SampleProcessing"
"DataProcessing"
"GermlineSet"
"GenotypeSet"
A Schema object for the definition
.
See Schema for the return object.
# Load the Rearrangement definition schema <- load_schema("Rearrangement") # Load the Repertoire definition schema <- load_schema("Repertoire")
# Load the Rearrangement definition schema <- load_schema("Rearrangement") # Load the Repertoire definition schema <- load_schema("Repertoire")
read_airr
loads a YAML or JSON file containing AIRR Data Model records.
read_airr( file, format = c("auto", "yaml", "json"), validate = TRUE, model = TRUE )
read_airr( file, format = c("auto", "yaml", "json"), validate = TRUE, model = TRUE )
file |
path to the input file. |
format |
format of the input file. Must be one of |
validate |
run schema validation if |
model |
if |
A named nested list
contained in the AIRR Data Model with the top-level
names reflecting the individual AIRR objects.
See Schema for the AIRR schema definition objects. See write_airr for writing AIRR Data Model records in YAML or JSON format.
# Get path to the Reportoire and GermlineSet example files f1 <- system.file("extdata", "repertoire-example.yaml", package="airr") f2 <- system.file("extdata", "germline-example.json", package="airr") # Load data files repertoire <- read_airr(f1) germline <- read_airr(f2)
# Get path to the Reportoire and GermlineSet example files f1 <- system.file("extdata", "repertoire-example.yaml", package="airr") f2 <- system.file("extdata", "germline-example.json", package="airr") # Load data files repertoire <- read_airr(f1) germline <- read_airr(f2)
read_tabular
reads a tab-delimited (TSV) file containing tabular AIRR records.
read_tabular(file, schema, base = c("1", "0"), aux_types = NULL, ...) read_rearrangement(file, base = c("1", "0"), ...) read_alignment(file, base = c("1", "0"), ...)
read_tabular(file, schema, base = c("1", "0"), aux_types = NULL, ...) read_rearrangement(file, base = c("1", "0"), ...) read_alignment(file, base = c("1", "0"), ...)
file |
input file path. |
schema |
|
base |
starting index for positional fields in the input file.
If |
aux_types |
named vector or list giving the type for fields that are not
defined in |
... |
additional arguments to pass to read_delim. |
read_rearrangement
reads an AIRR TSV containing Rearrangement data.
read_alignment
reads an AIRR TSV containing Alignment data.
A data.frame
of the TSV file with appropriate type and position conversion
for fields defined in the specification.
See Schema for the AIRR schema object definition. See write_tabular for writing AIRR data.
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file)
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file)
Schema
defines a common data structure for AIRR Data Representation
standards.
## S4 method for signature 'Schema' names(x) ## S4 method for signature 'Schema,character' x[i] ## S4 method for signature 'Schema' x$name InfoSchema DataFileSchema AlignmentSchema RearrangementSchema RepertoireSchema GermlineSetSchema GenotypeSetSchema AIRRSchema
## S4 method for signature 'Schema' names(x) ## S4 method for signature 'Schema,character' x[i] ## S4 method for signature 'Schema' x$name InfoSchema DataFileSchema AlignmentSchema RearrangementSchema RepertoireSchema GermlineSetSchema GenotypeSetSchema AIRRSchema
x |
|
i |
field name. |
name |
field name. |
A Schema
object.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class Schema
of length 1.
An object of class list
of length 26.
The following predefined Schema objects are defined:
InfoSchema
: AIRR Info Schema
.
DataFileSchema
: AIRR DataFile Schema
.
AlignmentSchema
: AIRR Alignment Schema
.
RearrangementSchema
: AIRR Rearrangement Schema
.
RepertoireSchema
: AIRR Repertoire Schema
.
GermlineSetSchema
: AIRR GermlineSet Schema
.
GenotypeSetSchema
: AIRR GenotypeSet Schema
.
AIRRSchema
: named list containing all non-experimental
AIRR Schema
objects.
definition
name of the schema definition.
required
character
vector of required fields.
optional
character
vector of non-required fields.
properties
list
of field definitions.
info
list
schema information.
See load_schema for loading a Schema
from the definition set.
validate_airr
validates the fields in a named nested list representation of the
AIRR Data Model. Typically, generating by reading of JSON or YAML formatted AIRR files.
validate_airr(data, model = TRUE, each = FALSE)
validate_airr(data, model = TRUE, each = FALSE)
data |
|
model |
if |
each |
if |
Returns TRUE
if the input data
is compliant with AIRR standards and
FALSE
if not. If each=TRUE
is set, then a vector with results for each
each object in data
is returned instead.
See Schema for the AIRR schema definitions. See read_airr for loading AIRR Data Models from a file. See write_airr for writing AIRR Data Models to a file.
# Get path to the rearrangement-example file f1 <- system.file("extdata", "repertoire-example.yaml", package="airr") f2 <- system.file("extdata", "germline-example.json", package="airr") # Load data file repertoire <- read_airr(f1) germline <- read_airr(f2) # Validate a single record validate_airr(repertoire) # Return validation for individual objects validate_airr(germline, each=TRUE)
# Get path to the rearrangement-example file f1 <- system.file("extdata", "repertoire-example.yaml", package="airr") f2 <- system.file("extdata", "germline-example.json", package="airr") # Load data file repertoire <- read_airr(f1) germline <- read_airr(f2) # Validate a single record validate_airr(repertoire) # Return validation for individual objects validate_airr(germline, each=TRUE)
validate_tabular
validates compliance of the contents of a data.frame
to the AIRR standards.
validate_tabular(data, schema) validate_rearrangement(data)
validate_tabular(data, schema) validate_rearrangement(data)
data |
|
schema |
|
validate_rearrangement
validates the standards compliance of AIRR Rearrangement
data stored in a data.frame
Returns TRUE
if the input data
is compliant and
FALSE
if not.
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file) # Validate a data.frame against the Rearrangement schema validate_rearrangement(df)
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file) # Validate a data.frame against the Rearrangement schema validate_rearrangement(df)
write_airr
writes a YAML or JSON file containing AIRR Data Model records.
write_airr( data, file, format = c("auto", "yaml", "json"), validate = TRUE, model = TRUE )
write_airr( data, file, format = c("auto", "yaml", "json"), validate = TRUE, model = TRUE )
data |
|
file |
output file name. |
format |
format of the output file. Must be one of |
validate |
run schema validation prior to write if |
model |
if |
See Schema for the AIRR schema definition objects. See read_airr for reading to AIRR Data Model files.
# Get path to the repertoire-example file file <- system.file("extdata", "repertoire-example.yaml", package="airr") # Load data file repertoire <- read_airr(file) # Write a Rearrangement data file outfile <- file.path(tempdir(), "output.yaml") write_airr(repertoire, outfile)
# Get path to the repertoire-example file file <- system.file("extdata", "repertoire-example.yaml", package="airr") # Load data file repertoire <- read_airr(file) # Write a Rearrangement data file outfile <- file.path(tempdir(), "output.yaml") write_airr(repertoire, outfile)
write_tabular
writes a TSV containing AIRR tabular records.
write_tabular(data, file, schema, base = c("1", "0"), ...) write_rearrangement(data, file, base = c("1", "0"), ...) write_alignment(data, file, base = c("1", "0"), ...)
write_tabular(data, file, schema, base = c("1", "0"), ...) write_rearrangement(data, file, base = c("1", "0"), ...) write_alignment(data, file, base = c("1", "0"), ...)
data |
|
file |
output file name. |
schema |
|
base |
starting index for positional fields in the output file.
Fields in the input |
... |
additional arguments to pass to write_delim. |
write_rearrangement
writes a data.frame
containing AIRR Rearrangement data to TSV.
write_alignment
writes a data.frame
containing AIRR Alignment data to TSV.
See Schema for the AIRR schema object definition. See read_tabular for reading to AIRR files.
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file) # Write a Rearrangement data file outfile <- file.path(tempdir(), "output.tsv") write_tabular(df, outfile, schema=RearrangementSchema)
# Get path to the rearrangement-example file file <- system.file("extdata", "rearrangement-example.tsv.gz", package="airr") # Load data file df <- read_rearrangement(file) # Write a Rearrangement data file outfile <- file.path(tempdir(), "output.tsv") write_tabular(df, outfile, schema=RearrangementSchema)