SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
sam_file/input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <concepts>
17 #include <filesystem>
18 #include <fstream>
19 #include <ranges>
20 #include <string>
21 #include <variant>
22 #include <vector>
23 
34 #include <seqan3/io/detail/record.hpp>
35 #include <seqan3/io/exception.hpp>
47 
48 namespace seqan3
49 {
50 
51 // ---------------------------------------------------------------------------------------------------------------------
52 // sam_file_input_traits
53 // ---------------------------------------------------------------------------------------------------------------------
54 
113 template <typename t>
114 concept sam_file_input_traits =
115  requires (t v) {
116  // field::seq
121 
122  // field::id
124 
125  // field::qual
128 
129  // field::ref_seq
130  // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
131  requires std::same_as<typename t::ref_sequences, ref_info_not_given>
132  || requires () {
133  requires alphabet<std::ranges::range_reference_t<
134  std::ranges::range_reference_t<typename t::ref_sequences>>>;
135  };
136 
137  // field::ref_id
139  && (!std::same_as<typename t::ref_sequences, ref_info_not_given>
141  std::ranges::range_reference_t<std::ranges::range_reference_t<typename t::ref_ids>>>);
142  requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
143  requires std::ranges::forward_range<typename t::ref_ids>;
144 
145  // field::offset is fixed to int32_t
146  // field::ref_offset is fixed to std::optional<int32_t>
147  // field::flag is fixed to seqan3::sam_flag
148  // field::mapq is fixed to uint8_t
149  // field::evalue is fixed to double
150  // field::bitscore is fixed to double
151  // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
152 
153  // field::alignment
154  // the alignment type cannot be configured.
155  // Type of tuple entry 1 (reference) is set to
156  // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
157  // or 2) a "dummy" sequence type:
158  // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
159  // Type of tuple entry 2 (query) is set to
160  // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
161  // or 2) a "dummy" sequence type:
162  };
164 
165 // ---------------------------------------------------------------------------------------------------------------------
166 // sam_file_input_default_traits
167 // ---------------------------------------------------------------------------------------------------------------------
168 
184 template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
186 {
194 
197 
199  template <typename _sequence_alphabet>
201 
203  template <typename _id_alphabet>
205 
208 
210  template <typename _quality_alphabet>
212 
214  using ref_sequences = ref_sequences_t;
215 
217  using ref_ids = ref_ids_t;
219 };
220 
221 // ---------------------------------------------------------------------------------------------------------------------
222 // sam_file_input
223 // ---------------------------------------------------------------------------------------------------------------------
224 
240 template <sam_file_input_traits traits_type_ = sam_file_input_default_traits<>,
241  detail::fields_specialisation selected_field_ids_ = fields<field::seq,
242  field::id,
243  field::offset,
244  field::ref_id,
245  field::ref_offset,
246  field::alignment,
247  field::cigar,
248  field::mapq,
249  field::qual,
250  field::flag,
251  field::mate,
252  field::tags,
253  field::header_ptr>,
254  detail::type_list_of_sam_file_input_formats valid_formats_ = type_list<format_sam, format_bam>>
256 {
257 public:
263  using traits_type = traits_type_;
265  using selected_field_ids = selected_field_ids_;
267  using valid_formats = valid_formats_;
269  using stream_char_type = char;
271 
272 private:
274  using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{})
275  | std::views::transform(detail::access_restrictor_fn{}));
276 
278  using ref_sequence_unsliced_type = detail::lazy_conditional_t<
279  std::ranges::range<typename traits_type::ref_sequences const>,
280  detail::lazy<std::ranges::range_reference_t, typename traits_type::ref_sequences const>,
281  dummy_ref_type>;
282 
284  using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() | views::slice(0, 0));
285 
286 public:
295  using id_type = typename traits_type::template id_container<char>;
297  using offset_type = int32_t;
305  dummy_ref_type,
306  ref_sequence_sliced_type>;
323  using mapq_type = uint8_t;
325  using quality_type = typename traits_type::template quality_container<typename traits_type::quality_alphabet>;
334 
335 private:
341 
342 public:
345 
348  id_type,
349  offset_type,
350  ref_id_type,
354  mapq_type,
355  quality_type,
356  flag_type,
357  mate_type,
359  header_type *>;
360 
383  field::id,
388  field::cigar,
389  field::mapq,
390  field::qual,
391  field::flag,
392  field::mate,
393  field::tags,
395 
396  static_assert(
397  []() constexpr {
398  for (field f : selected_field_ids::as_array)
399  if (!field_ids::contains(f))
400  return false;
401  return true;
402  }(),
403  "You selected a field that is not valid for alignment files, please refer to the documentation "
404  "of sam_file_input::field_ids for the accepted values.");
405 
407  using record_type =
410 
420  using const_reference = void;
422  using size_type = size_t;
426  using iterator = detail::in_file_iterator<sam_file_input>;
428  using const_iterator = void;
430  using sentinel = std::default_sentinel_t;
432 
437  sam_file_input() = delete;
439  sam_file_input(sam_file_input const &) = delete;
447  ~sam_file_input() = default;
448 
467  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
468  primary_stream{new std::ifstream{}, stream_deleter_default}
469  {
470  init_by_filename(std::move(filename));
471  }
472 
492  template <input_stream stream_t, sam_file_input_format file_format>
493  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
494  sam_file_input(stream_t & stream,
495  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
496  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
497  primary_stream{&stream, stream_deleter_noop}
498  {
499  init_by_format<file_format>();
500  }
501 
503  template <input_stream stream_t, sam_file_input_format file_format>
504  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
505  sam_file_input(stream_t && stream,
506  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
507  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
508  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
509  {
510  init_by_format<file_format>();
511  }
512 
537  typename traits_type::ref_ids & ref_ids,
538  typename traits_type::ref_sequences & ref_sequences,
539  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
540  primary_stream{new std::ifstream{}, stream_deleter_default}
541  {
542  // initialize reference information
543  set_references(ref_ids, ref_sequences);
544 
545  init_by_filename(std::move(filename));
546  }
547 
573  template <input_stream stream_t, sam_file_input_format file_format>
574  sam_file_input(stream_t & stream,
575  typename traits_type::ref_ids & ref_ids,
576  typename traits_type::ref_sequences & ref_sequences,
577  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
578  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
579  primary_stream{&stream, stream_deleter_noop}
580  {
581  // initialize reference information
582  set_references(ref_ids, ref_sequences);
583 
584  init_by_format<file_format>();
585  }
586 
588  template <input_stream stream_t, sam_file_input_format file_format>
589  sam_file_input(stream_t && stream,
590  typename traits_type::ref_ids & ref_ids,
591  typename traits_type::ref_sequences & ref_sequences,
592  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
593  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
594  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
595  {
596  // initialize reference information
597  set_references(ref_ids, ref_sequences);
598 
599  init_by_format<file_format>();
600  }
601 
603  // explicitly delete rvalues for reference information
605  typename traits_type::ref_ids &&,
606  typename traits_type::ref_sequences &&,
607  selected_field_ids const &) = delete;
608 
609  template <input_stream stream_t, sam_file_input_format file_format>
610  sam_file_input(stream_t &&,
611  typename traits_type::ref_ids &&,
612  typename traits_type::ref_sequences &&,
613  file_format const &,
614  selected_field_ids const &) = delete;
617 
639  {
640  // buffer first record
641  if (!first_record_was_read)
642  {
643  read_next_record();
644  first_record_was_read = true;
645  }
646 
647  return {*this};
648  }
649 
663  sentinel end() noexcept
664  {
665  return {};
666  }
667 
691  reference front() noexcept
692  {
693  return *begin();
694  }
696 
699 
713  {
714  // make sure header is read
715  if (!first_record_was_read)
716  {
717  read_next_record();
718  first_record_was_read = true;
719  }
720 
721  return *header_ptr;
722  }
723 
724 protected:
726 
728  void init_by_filename(std::filesystem::path filename)
729  {
730  primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
731  static_cast<std::basic_ifstream<char> *>(primary_stream.get())
732  ->open(filename, std::ios_base::in | std::ios::binary);
733  // open stream
734  if (!primary_stream->good())
735  throw file_open_error{"Could not open file " + filename.string() + " for reading."};
736 
737  secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
738  detail::set_format(format, filename);
739  }
740 
742  template <typename format_type>
743  void init_by_format()
744  {
745  static_assert(list_traits::contains<format_type, valid_formats>,
746  "You selected a format that is not in the valid_formats of this file.");
747 
748  format = detail::sam_file_input_format_exposer<format_type>{};
749  secondary_stream = detail::make_secondary_istream(*primary_stream);
750  }
751 
753  std::unique_ptr<header_type> header_ptr{new header_type{}};
754 
759  record_type record_buffer;
761  std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
763  std::streampos position_buffer{};
765 
773  static void stream_deleter_noop(std::basic_istream<stream_char_type> *)
774  {}
776  static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr)
777  {
778  delete ptr;
779  }
780 
782  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
784  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
785 
787  bool first_record_was_read{false};
789  bool at_end{false};
790 
792  using format_type = typename detail::variant_from_tags<valid_formats, detail::sam_file_input_format_exposer>::type;
793 
795  format_type format;
797 
802  typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
803 
814  template <std::ranges::forward_range ref_sequences_t>
815  void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
816  {
817  assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
818 
819  header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
820  reference_sequences_ptr = &ref_sequences;
821 
822  // initialise reference map and ref_dict if ref_ids are non-empty
823  for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
824  {
825  header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
826 
827  if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
828  && std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
829  && std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
830  {
831  auto && id = header_ptr->ref_ids()[idx];
832  header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
833  }
834  else
835  {
836  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
837  }
838  }
839  }
841 
843  void read_next_record()
844  {
845  // clear the record
846  record_buffer.clear();
847  detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
848 
849  // at end if we could not read further
850  if (std::istreambuf_iterator<stream_char_type>{*secondary_stream}
852  {
853  at_end = true;
854  return;
855  }
856 
857  auto call_read_func = [this](auto & ref_seq_info)
858  {
859  std::visit(
860  [&](auto & f)
861  {
862  f.read_alignment_record(*secondary_stream,
863  options,
864  ref_seq_info,
865  *header_ptr,
866  position_buffer,
867  detail::get_or_ignore<field::seq>(record_buffer),
868  detail::get_or_ignore<field::qual>(record_buffer),
869  detail::get_or_ignore<field::id>(record_buffer),
870  detail::get_or_ignore<field::offset>(record_buffer),
871  detail::get_or_ignore<field::ref_seq>(record_buffer),
872  detail::get_or_ignore<field::ref_id>(record_buffer),
873  detail::get_or_ignore<field::ref_offset>(record_buffer),
874  detail::get_or_ignore<field::alignment>(record_buffer),
875  detail::get_or_ignore<field::cigar>(record_buffer),
876  detail::get_or_ignore<field::flag>(record_buffer),
877  detail::get_or_ignore<field::mapq>(record_buffer),
878  detail::get_or_ignore<field::mate>(record_buffer),
879  detail::get_or_ignore<field::tags>(record_buffer),
880  detail::get_or_ignore<field::evalue>(record_buffer),
881  detail::get_or_ignore<field::bit_score>(record_buffer));
882  },
883  format);
884  };
885 
886  assert(!format.valueless_by_exception());
887 
888  if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
889  call_read_func(*reference_sequences_ptr);
890  else
891  call_read_func(std::ignore);
892  }
893 
895  friend iterator;
896 };
897 
903 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
904 sam_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
905  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
908 
910 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
911 sam_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
912  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
915 
917 template <input_stream stream_type, sam_file_input_format file_format>
918 sam_file_input(stream_type && stream, file_format const &)
919  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
920  typename sam_file_input<>::selected_field_ids, // actually use the default
922 
924 template <input_stream stream_type, sam_file_input_format file_format>
925 sam_file_input(stream_type & stream, file_format const &)
926  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
927  typename sam_file_input<>::selected_field_ids, // actually use the default
929 
931 template <std::ranges::forward_range ref_ids_t,
932  std::ranges::forward_range ref_sequences_t,
933  detail::fields_specialisation selected_field_ids>
934 sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &)
935  -> sam_file_input<
938  typename sam_file_input<>::valid_formats>; // actually use the default
939 
941 template <std::ranges::forward_range ref_ids_t, std::ranges::forward_range ref_sequences_t>
942 sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input<
944  typename sam_file_input<>::selected_field_ids, // actually use the default
945  typename sam_file_input<>::valid_formats>; // actually use the default
946 
948 template <input_stream stream_type,
949  std::ranges::forward_range ref_ids_t,
950  std::ranges::forward_range ref_sequences_t,
951  sam_file_input_format file_format,
952  detail::fields_specialisation selected_field_ids>
953 sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
954  -> sam_file_input<
958 
960 template <input_stream stream_type,
961  std::ranges::forward_range ref_ids_t,
962  std::ranges::forward_range ref_sequences_t,
963  sam_file_input_format file_format,
964  detail::fields_specialisation selected_field_ids>
965 sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
966  -> sam_file_input<
970 
972 template <input_stream stream_type,
973  std::ranges::forward_range ref_ids_t,
974  std::ranges::forward_range ref_sequences_t,
975  sam_file_input_format file_format>
976 sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
978  typename sam_file_input<>::selected_field_ids, // actually use the default
980 
982 template <input_stream stream_type,
983  std::ranges::forward_range ref_ids_t,
984  std::ranges::forward_range ref_sequences_t,
985  sam_file_input_format file_format>
986 sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
988  typename sam_file_input<>::selected_field_ids, // actually use the default
991 
992 } // namespace seqan3
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:81
Quality type for traditional Sanger and modern Illumina Phred scores.
Definition: phred42.hpp:47
Stores the header information of alignment files.
Definition: header.hpp:34
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: sam_file/input.hpp:256
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: sam_file/input.hpp:663
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: sam_file/input.hpp:422
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: sam_file/input.hpp:314
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: sam_file/input.hpp:420
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: sam_file/input.hpp:267
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce ref_sequences_t and ref_ids_t, default the rest.
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
char stream_char_type
Character type of the stream(s).
Definition: sam_file/input.hpp:269
detail::in_file_iterator< sam_file_input > iterator
The iterator type of this view (an input iterator).
Definition: sam_file/input.hpp:426
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, stream_char_type > sam_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: sam_file/input.hpp:505
sam_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: sam_file/input.hpp:466
sam_file_input & operator=(sam_file_input &&)=default
Move assignment is defaulted.
sam_file_input(stream_type &&stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce file_format, and default the rest.
std::default_sentinel_t sentinel
The type returned by end().
Definition: sam_file/input.hpp:430
sam_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: sam_file/input.hpp:574
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: sam_file/input.hpp:293
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to a std::optional<int32_t>.
Definition: sam_file/input.hpp:321
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: sam_file/input.hpp:263
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: sam_file/input.hpp:297
sam_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: sam_file/input.hpp:698
sam_file_header< typename traits_type::ref_ids > header_type
The type of field::header_ptr (default: sam_file_header<typename traits_type::ref_ids>).
Definition: sam_file/input.hpp:333
header_type & header()
Access the file's header.
Definition: sam_file/input.hpp:712
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce selected fields, ref_sequences_t and ref_ids_t, default the rest.
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: sam_file/input.hpp:295
sam_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: sam_file/input.hpp:589
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: sam_file/input.hpp:344
sam_record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: sam_file/input.hpp:408
sam_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
iterator begin()
Returns an iterator to current position in the file.
Definition: sam_file/input.hpp:638
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: sam_file/input.hpp:265
sam_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: sam_file/input.hpp:536
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, stream_char_type > sam_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: sam_file/input.hpp:494
sam_file_input(sam_file_input &&)=default
Move construction is defaulted.
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: sam_file/input.hpp:428
sam_file_input(sam_file_input const &)=delete
Copy construction is explicitly deleted because you cannot have multiple access to the same file.
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: sam_file/input.hpp:323
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: sam_file/input.hpp:327
sam_file_input(stream_type &stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format >>
Deduce selected fields, file_format, and default the rest.
sam_file_input(stream_type &stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce file_format, and default the rest.
sam_file_input & operator=(sam_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
~sam_file_input()=default
Destructor is defaulted.
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce ref_sequences_t and ref_ids_t, and file format.
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: sam_file/input.hpp:331
reference front() noexcept
Return the record we are currently at in the file.
Definition: sam_file/input.hpp:691
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: sam_file/input.hpp:325
sam_file_input(stream_type &&stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format >>
Deduce selected fields, file_format, and default the rest.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
The <concepts> header from C++20's standard library.
T data(T... args)
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
Provides the seqan3::format_bam.
Provides the seqan3::format_sam.
Provides seqan3::gap_decorator.
T get(T... args)
alphabet_variant< alphabet_t, gap > gapped
Extends a given alphabet with a gap character.
Definition: gapped.hpp:41
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
requires constexpr seqan3::detail::template_specialisation_of< list_t, seqan3::type_list > bool contains
Whether a type occurs in a type list or not.
Definition: type_list/traits.hpp:252
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: type_list/traits.hpp:469
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be explicitly converted to to.
The generic concept for alignment file input formats.
The requirements a traits_type for seqan3::sam_file_input must meet.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for input.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
The <ranges> header from C++20's standard library.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
T size(T... args)
Provides seqan3::views::slice.
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: io/exception.hpp:39
The default traits for seqan3::sam_file_input.
Definition: sam_file/input.hpp:186
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: sam_file/input.hpp:217
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: sam_file/input.hpp:214
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::detail::transformation_trait_or.
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
T visit(T... args)