MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
FileReader.hpp
Go to the documentation of this file.
1#pragma once
2
4
5#include "filesystem"
6#include "typeindex"
7
8namespace MayaFlux::Kakshya {
9
10class SignalSourceContainer;
11struct RegionGroup;
12}
13
14namespace MayaFlux::IO {
15
16/**
17 * @struct FileMetadata
18 * @brief Generic metadata structure for any file type.
19 *
20 * Stores both standard and type-specific metadata for files, including format,
21 * MIME type, size, timestamps, and arbitrary key-value attributes.
22 */
24 std::string format; ///< File format identifier (e.g., "wav", "mp3", "hdf5")
25 std::string mime_type; ///< MIME type if applicable (e.g., "audio/wav")
26 uint64_t file_size = 0; ///< Size in bytes
27 std::chrono::system_clock::time_point creation_time; ///< File creation time
28 std::chrono::system_clock::time_point modification_time; ///< Last modification time
29
30 /// Type-specific metadata stored as key-value pairs (e.g., sample rate, channels)
31 std::unordered_map<std::string, std::any> attributes;
32
33 /**
34 * @brief Get a typed attribute value by key.
35 * @tparam T Expected type.
36 * @param key Attribute key.
37 * @return Optional value if present and convertible.
38 */
39 template <typename T>
40 std::optional<T> get_attribute(const std::string& key) const
41 {
42 auto it = attributes.find(key);
43 if (it != attributes.end()) {
44 try {
45 return safe_any_cast<T>(it->second);
46 } catch (const std::bad_any_cast&) {
47 return std::nullopt;
48 }
49 }
50 return std::nullopt;
51 }
52};
53
54/**
55 * @enum FileReadOptions
56 * @brief Generic options for file reading behavior.
57 *
58 * Bitmask flags to control file reading, metadata extraction, streaming, and more.
59 */
60enum class FileReadOptions : uint32_t {
61 NONE = 0, ///< No special options
62 EXTRACT_METADATA = 1 << 0, ///< Extract file metadata
63 EXTRACT_REGIONS = 1 << 1, ///< Extract semantic regions (format-specific)
64 LAZY_LOAD = 1 << 2, ///< Don't load all data immediately
65 STREAMING = 1 << 3, ///< Enable streaming mode
66 HIGH_PRECISION = 1 << 4, ///< Use highest precision available
67 VERIFY_INTEGRITY = 1 << 5, ///< Verify file integrity/checksums
68 DECOMPRESS = 1 << 6, ///< Decompress if compressed
69 PARSE_STRUCTURE = 1 << 7, ///< Parse internal structure
70 ALL = 0xFFFFFFFF ///< All options enabled
71};
72
74{
75 return static_cast<FileReadOptions>(static_cast<uint32_t>(a) | static_cast<uint32_t>(b));
76}
77
79{
80 return static_cast<FileReadOptions>(static_cast<uint32_t>(a) & static_cast<uint32_t>(b));
81}
82
83/**
84 * @struct FileRegion
85 * @brief Generic region descriptor for any file type.
86 *
87 * Describes a logical region or segment within a file, such as a cue, marker,
88 * chapter, scene, or data block. Used for both audio/video and scientific data.
89 */
90struct FileRegion {
91 std::string type; ///< Region type identifier (e.g., "cue", "scene", "block")
92 std::string name; ///< Human-readable name for the region
93 std::vector<uint64_t> start_coordinates; ///< N-dimensional start position (e.g., frame, x, y)
94 std::vector<uint64_t> end_coordinates; ///< N-dimensional end position (inclusive)
95 std::unordered_map<std::string, std::any> attributes; ///< Region-specific metadata
96
97 /**
98 * @brief Convert this FileRegion to a Region for use in processing.
99 * @return Region with equivalent coordinates and attributes.
100 */
102};
103
104/**
105 * @class FileReader
106 * @brief Abstract interface for reading various file formats into containers.
107 *
108 * FileReader provides a type-agnostic interface for loading file data into
109 * the MayaFlux container system. It supports a wide range of structured data:
110 * - Audio files (WAV, MP3, FLAC, etc.)
111 * - Video files (MP4, AVI, MOV, etc.)
112 * - Image sequences or multi-dimensional image data
113 * - Scientific data formats (HDF5, NetCDF, etc.)
114 * - Custom binary formats
115 * - Text-based structured data (JSON, XML, CSV as regions)
116 *
117 * The interface is designed for flexibility, supporting region extraction,
118 * metadata parsing, streaming, and container creation for any data type.
119 */
121public:
122 virtual ~FileReader() = default;
123
124 /**
125 * @brief Check if a file can be read by this reader.
126 * @param filepath Path to the file.
127 * @return true if the file format is supported.
128 */
129 [[nodiscard]] virtual bool can_read(const std::string& filepath) const = 0;
130
131 /**
132 * @brief Open a file for reading.
133 * @param filepath Path to the file.
134 * @param options Reading options (see FileReadOptions).
135 * @return true if file was successfully opened.
136 */
137 virtual bool open(const std::string& filepath, FileReadOptions options = FileReadOptions::ALL) = 0;
138
139 /**
140 * @brief Close the currently open file.
141 */
142 virtual void close() = 0;
143
144 /**
145 * @brief Check if a file is currently open.
146 * @return true if a file is open.
147 */
148 [[nodiscard]] virtual bool is_open() const = 0;
149
150 /**
151 * @brief Get metadata from the open file.
152 * @return File metadata or nullopt if no file is open.
153 */
154 [[nodiscard]] virtual std::optional<FileMetadata> get_metadata() const = 0;
155
156 /**
157 * @brief Get semantic regions from the file.
158 * @return Vector of regions found in the file.
159 *
160 * Regions are format-specific:
161 * - Audio: cues, markers, loops, chapters
162 * - Video: scenes, chapters, keyframes
163 * - Images: layers, selections, annotations
164 * - Data: chunks, blocks, datasets
165 */
166 [[nodiscard]] virtual std::vector<FileRegion> get_regions() const = 0;
167
168 /**
169 * @brief Read all data from the file into memory.
170 * @return DataVariant vector containing the file data.
171 */
172 virtual std::vector<Kakshya::DataVariant> read_all() = 0;
173
174 /**
175 * @brief Read a specific region of data.
176 * @param region Region descriptor.
177 * @return DataVariant vector containing the requested data.
178 */
179 virtual std::vector<Kakshya::DataVariant> read_region(const FileRegion& region) = 0;
180
181 /**
182 * @brief Create and initialize a container from the file.
183 * @return Initialized container appropriate for the file type.
184 *
185 * The specific container type returned depends on the file format:
186 * - Audio files -> SoundFileContainer
187 * - Video files -> VideoContainer (future)
188 * - Image files -> ImageContainer (future)
189 * - Data files -> DataContainer variants
190 */
191 virtual std::shared_ptr<Kakshya::SignalSourceContainer> create_container() = 0;
192
193 /**
194 * @brief Load file data into an existing container.
195 * @param container Target container (must be compatible type).
196 * @return true if successful.
197 */
198 virtual bool load_into_container(std::shared_ptr<Kakshya::SignalSourceContainer> container) = 0;
199
200 /**
201 * @brief Get current read position in primary dimension.
202 * @return Current position (interpretation is format-specific).
203 */
204 [[nodiscard]] virtual std::vector<uint64_t> get_read_position() const = 0;
205
206 /**
207 * @brief Seek to a specific position in the file.
208 * @param position Target position in N-dimensional space.
209 * @return true if seek was successful.
210 */
211 virtual bool seek(const std::vector<uint64_t>& position) = 0;
212
213 /**
214 * @brief Get supported file extensions for this reader.
215 * @return Vector of supported extensions (without dots).
216 */
217 [[nodiscard]] virtual std::vector<std::string> get_supported_extensions() const = 0;
218
219 /**
220 * @brief Get the data type this reader produces.
221 * @return Type info for the data variant content.
222 */
223 [[nodiscard]] virtual std::type_index get_data_type() const = 0;
224
225 /**
226 * @brief Get the container type this reader creates.
227 * @return Type info for the container type.
228 */
229 [[nodiscard]] virtual std::type_index get_container_type() const = 0;
230
231 /**
232 * @brief Get the last error message.
233 * @return Error string or empty if no error.
234 */
235 [[nodiscard]] virtual std::string get_last_error() const = 0;
236
237 /**
238 * @brief Check if streaming is supported for the current file.
239 * @return true if file can be streamed.
240 */
241 [[nodiscard]] virtual bool supports_streaming() const = 0;
242
243 /**
244 * @brief Get the preferred chunk size for streaming.
245 * @return Chunk size in primary dimension units.
246 */
247 [[nodiscard]] virtual uint64_t get_preferred_chunk_size() const = 0;
248
249 /**
250 * @brief Get the dimensionality of the file data.
251 * @return Number of dimensions.
252 */
253 [[nodiscard]] virtual size_t get_num_dimensions() const = 0;
254
255 /**
256 * @brief Get size of each dimension in the file data.
257 * @return Vector of dimension sizes.
258 */
259 [[nodiscard]] virtual std::vector<uint64_t> get_dimension_sizes() const = 0;
260
261protected:
262 /**
263 * @brief Convert file regions to region groups.
264 * @param regions Vector of file regions.
265 * @return Region groups organized by type.
266 *
267 * Groups regions by their type field, producing a map from type to RegionGroup.
268 */
269 static std::unordered_map<std::string, Kakshya::RegionGroup>
270 regions_to_groups(const std::vector<FileRegion>& regions);
271};
272
273// Type alias for factory function
274using FileReaderFactory = std::function<std::unique_ptr<FileReader>()>;
275
276/**
277 * @class FileReaderRegistry
278 * @brief Registry for file reader implementations.
279 *
280 * Allows registration of different FileReader implementations
281 * and automatic selection based on file extension or content.
282 */
284public:
285 /**
286 * @brief Get the singleton instance of the registry.
287 */
289 {
290 static FileReaderRegistry registry;
291 return registry;
292 }
293
294 /**
295 * @brief Register a file reader factory for one or more extensions.
296 * @param extensions Supported file extensions (without dots).
297 * @param factory Factory function to create reader.
298 */
299 void register_reader(const std::vector<std::string>& extensions, const FileReaderFactory& factory)
300 {
301 for (const auto& ext : extensions) {
302 m_factories[ext] = factory;
303 }
304 }
305
306 /**
307 * @brief Create appropriate reader for a file based on extension.
308 * @param filepath Path to file.
309 * @return Reader instance or nullptr if no suitable reader.
310 */
311 std::unique_ptr<FileReader> create_reader(const std::string& filepath) const
312 {
313 auto ext = std::filesystem::path(filepath).extension().string();
314 if (!ext.empty() && ext[0] == '.') {
315 ext = ext.substr(1);
316 }
317
318 auto it = m_factories.find(ext);
319 if (it != m_factories.end()) {
320 return it->second();
321 }
322 return nullptr;
323 }
324
325private:
326 std::unordered_map<std::string, FileReaderFactory> m_factories;
327};
328
329} // namespace MayaFlux::Kakshya
std::unique_ptr< FileReader > create_reader(const std::string &filepath) const
Create appropriate reader for a file based on extension.
static FileReaderRegistry & instance()
Get the singleton instance of the registry.
void register_reader(const std::vector< std::string > &extensions, const FileReaderFactory &factory)
Register a file reader factory for one or more extensions.
std::unordered_map< std::string, FileReaderFactory > m_factories
Registry for file reader implementations.
virtual bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL)=0
Open a file for reading.
virtual std::vector< std::string > get_supported_extensions() const =0
Get supported file extensions for this reader.
virtual bool seek(const std::vector< uint64_t > &position)=0
Seek to a specific position in the file.
virtual std::vector< FileRegion > get_regions() const =0
Get semantic regions from the file.
virtual std::type_index get_container_type() const =0
Get the container type this reader creates.
virtual std::vector< uint64_t > get_dimension_sizes() const =0
Get size of each dimension in the file data.
virtual ~FileReader()=default
virtual std::vector< Kakshya::DataVariant > read_all()=0
Read all data from the file into memory.
virtual bool supports_streaming() const =0
Check if streaming is supported for the current file.
virtual size_t get_num_dimensions() const =0
Get the dimensionality of the file data.
virtual uint64_t get_preferred_chunk_size() const =0
Get the preferred chunk size for streaming.
virtual std::vector< Kakshya::DataVariant > read_region(const FileRegion &region)=0
Read a specific region of data.
virtual bool can_read(const std::string &filepath) const =0
Check if a file can be read by this reader.
static std::unordered_map< std::string, Kakshya::RegionGroup > regions_to_groups(const std::vector< FileRegion > &regions)
Convert file regions to region groups.
virtual std::string get_last_error() const =0
Get the last error message.
virtual std::optional< FileMetadata > get_metadata() const =0
Get metadata from the open file.
virtual bool is_open() const =0
Check if a file is currently open.
virtual void close()=0
Close the currently open file.
virtual std::shared_ptr< Kakshya::SignalSourceContainer > create_container()=0
Create and initialize a container from the file.
virtual bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container)=0
Load file data into an existing container.
virtual std::type_index get_data_type() const =0
Get the data type this reader produces.
virtual std::vector< uint64_t > get_read_position() const =0
Get current read position in primary dimension.
Abstract interface for reading various file formats into containers.
std::function< std::unique_ptr< FileReader >()> FileReaderFactory
FileReadOptions
Generic options for file reading behavior.
@ EXTRACT_METADATA
Extract file metadata.
@ ALL
All options enabled.
@ HIGH_PRECISION
Use highest precision available.
@ EXTRACT_REGIONS
Extract semantic regions (format-specific)
@ NONE
No special options.
@ STREAMING
Enable streaming mode.
@ PARSE_STRUCTURE
Parse internal structure.
@ DECOMPRESS
Decompress if compressed.
@ VERIFY_INTEGRITY
Verify file integrity/checksums.
@ LAZY_LOAD
Don't load all data immediately.
FileReadOptions operator&(FileReadOptions a, FileReadOptions b)
FileReadOptions operator|(FileReadOptions a, FileReadOptions b)
uint64_t file_size
Size in bytes.
std::unordered_map< std::string, std::any > attributes
Type-specific metadata stored as key-value pairs (e.g., sample rate, channels)
std::chrono::system_clock::time_point modification_time
Last modification time.
std::string format
File format identifier (e.g., "wav", "mp3", "hdf5")
std::string mime_type
MIME type if applicable (e.g., "audio/wav")
std::chrono::system_clock::time_point creation_time
File creation time.
std::optional< T > get_attribute(const std::string &key) const
Get a typed attribute value by key.
Generic metadata structure for any file type.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
Kakshya::Region to_region() const
Convert this FileRegion to a Region for use in processing.
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.
Represents a point or span in N-dimensional space.
Definition Region.hpp:67