FASTA and FASTQ are basic and ubiquitous formats for storing nucleotide and protein sequences. Common manipulations of FASTA/Q file include converting, searching, filtering, deduplication, splitting, shuffling, and sampling. Existing tools only implement some of these manipulations, and not particularly efficiently, and some are only available for certain operating systems. Furthermore, the complicated installation process of required packages and running environments can render these programs less user friendly. This paper describes a cross-platform ultrafast comprehensive toolkit for FASTA/Q processing. SeqKit provides executable binary files for all major operating systems, including Windows, Linux, and Mac OSX, and can be directly used without any dependencies or pre-configurations. SeqKit demonstrates competitive performance in execution time and memory usage compared to similar tools. The efficiency and usability of SeqKit enable researchers to rapidly accomplish common FASTA/Q file manipulations. SeqKit is open source and available on Github at https://github.com/shenwei356/seqkit.
Beschreibung
SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation
%0 Journal Article
%1 10.1371/journal.pone.0163962
%A Shen, Wei
%A Le, Shuai
%A Li, Yan
%A Hu, Fuquan
%D 2016
%I Public Library of Science
%J PLOS ONE
%K MUSTREAD fastq open-source sequencing software
%N 10
%P 1-10
%R 10.1371/journal.pone.0163962
%T SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation
%U https://doi.org/10.1371/journal.pone.0163962
%V 11
%X FASTA and FASTQ are basic and ubiquitous formats for storing nucleotide and protein sequences. Common manipulations of FASTA/Q file include converting, searching, filtering, deduplication, splitting, shuffling, and sampling. Existing tools only implement some of these manipulations, and not particularly efficiently, and some are only available for certain operating systems. Furthermore, the complicated installation process of required packages and running environments can render these programs less user friendly. This paper describes a cross-platform ultrafast comprehensive toolkit for FASTA/Q processing. SeqKit provides executable binary files for all major operating systems, including Windows, Linux, and Mac OSX, and can be directly used without any dependencies or pre-configurations. SeqKit demonstrates competitive performance in execution time and memory usage compared to similar tools. The efficiency and usability of SeqKit enable researchers to rapidly accomplish common FASTA/Q file manipulations. SeqKit is open source and available on Github at https://github.com/shenwei356/seqkit.
@article{10.1371/journal.pone.0163962,
abstract = {FASTA and FASTQ are basic and ubiquitous formats for storing nucleotide and protein sequences. Common manipulations of FASTA/Q file include converting, searching, filtering, deduplication, splitting, shuffling, and sampling. Existing tools only implement some of these manipulations, and not particularly efficiently, and some are only available for certain operating systems. Furthermore, the complicated installation process of required packages and running environments can render these programs less user friendly. This paper describes a cross-platform ultrafast comprehensive toolkit for FASTA/Q processing. SeqKit provides executable binary files for all major operating systems, including Windows, Linux, and Mac OSX, and can be directly used without any dependencies or pre-configurations. SeqKit demonstrates competitive performance in execution time and memory usage compared to similar tools. The efficiency and usability of SeqKit enable researchers to rapidly accomplish common FASTA/Q file manipulations. SeqKit is open source and available on Github at https://github.com/shenwei356/seqkit.},
added-at = {2018-05-31T08:40:11.000+0200},
author = {Shen, Wei and Le, Shuai and Li, Yan and Hu, Fuquan},
biburl = {https://www.bibsonomy.org/bibtex/242b5e2859feea2b3405998f947799ef4/marcsaric},
description = {SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation},
doi = {10.1371/journal.pone.0163962},
interhash = {650a38e606a50233636e56e5ccf0dfb6},
intrahash = {42b5e2859feea2b3405998f947799ef4},
journal = {PLOS ONE},
keywords = {MUSTREAD fastq open-source sequencing software},
month = {10},
number = 10,
pages = {1-10},
publisher = {Public Library of Science},
timestamp = {2018-05-31T08:40:11.000+0200},
title = {SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation},
url = {https://doi.org/10.1371/journal.pone.0163962},
volume = 11,
year = 2016
}