ifw-daq 3.1.0
IFW Data Acquisition modules
Loading...
Searching...
No Matches
merge.cpp
Go to the documentation of this file.
1/**
2 * @file
3 * @ingroup daq_dpm_merge
4 * @copyright ESO - European Southern Observatory
5 */
6#include "merge.hpp"
7
8#include <fmt/format.h>
9#include <log4cplus/loggingmacros.h>
10
11#include <daq/fits/cfitsio.hpp>
12
13namespace daq::dpm::merge {
14
15void CheckUnmergedPrimaryHduData(log4cplus::Logger const& logger,
16 StatusReporter& status_reporter,
17 FitsFileSource const& source) {
18 int status = 0;
19 int type = 0;
20 fitsfile* file = source.GetFitsFile();
21
22 // Select primary HDU (1)
23 (void)fits_movabs_hdu(file, 1, &type, &status);
24 // As primary HDU must be empty or contain image (and not tables) we just check the image axis
25 // dimensions.
26 auto const max_dim = 10;
27 long naxes[max_dim] = {0};
28 int naxis = 0;
29 fits_get_img_dim(file, &naxis, &status);
30 fits_get_img_size(file, max_dim, naxes, &status);
31 if (status != 0) {
32 // Non-conforming FITS -> ignore.
33 return;
34 }
35
36 naxis = naxis < max_dim ? naxis : max_dim;
37 bool has_data = false;
38 for (auto axis = 0; axis < naxis; ++axis) {
39 if (naxes[axis] > 0u) {
40 // non-empty axis
41 has_data = true;
42 break;
43 }
44 }
45
46 if (has_data) {
47 auto dim_str = std::string();
48 for (auto axis = 0; axis < naxis; ++axis) {
49 if (naxes[axis] > 0u) {
50 auto dim = naxes[axis];
51 if (axis > 0) {
52 dim_str += fmt::format(", {}", dim);
53 } else {
54 dim_str += fmt::format("{}", dim);
55 }
56 }
57 }
58 auto msg = fmt::format(
59 "Unmerged data: Source file has data in primary HDU that cannot be merged: '{}' "
60 "produced file '{}' that was copied from '{}' has dimensions ({})",
61 source.GetName(),
62 source.GetFilePath().c_str(),
63 source.GetLocation(),
64 dim_str);
65 LOG4CPLUS_WARN(logger, msg);
66 if (source.AlertUnmergeable()) {
67 status_reporter.PostAlert(
68 fmt::format(
69 "unmerged-{}-{}", source.GetName(), source.GetFilePath().filename().native()),
70 msg);
71 }
72 }
73}
74
76 try {
77 return source.GetKeywordRuleProcessor().Process(
78 source.GetKeywords(),
79 source.GetInitialKeywords().value_or(KeywordRuleProcessor::DefaultRule::User));
80 } catch (...) {
81 auto msg = fmt::format("Processing keyword rules failed for JSON keywords source '{}'",
82 source.GetName());
83 LOG4CPLUS_ERROR(ops.logger, msg);
84 std::throw_with_nested(std::runtime_error(msg));
85 }
86}
87
89 auto logger = log4cplus::Logger::getInstance("daq.dpmmerge");
90 auto literal_kws = fits::ReadKeywords(source.GetFitsFile(), hdu_num);
92 result.reserve(literal_kws.size());
93 std::copy(std::make_move_iterator(literal_kws.begin()),
94 std::make_move_iterator(literal_kws.end()),
95 std::back_inserter(result));
96
97 try {
98 return source.GetKeywordRuleProcessor().Process(
99 result, source.GetInitialKeywords().value_or(KeywordRuleProcessor::DefaultRule::User));
100 } catch (...) {
101 auto msg = fmt::format("Processing keyword rules failed for FITS file keywords source '{}'",
102 source.GetFilePath().string());
103 LOG4CPLUS_ERROR(logger, msg);
104 std::throw_with_nested(std::runtime_error(msg));
105 }
106}
107
108/**
109 * Compiles unique keywords and formats primary HDU keywords
110 */
112 TargetSource& target,
113 std::vector<SourceTypes> const& sources) {
114 auto logger = log4cplus::Logger::getInstance("daq.dpmmerge");
115 constexpr const int primary_hdu_num = 1;
116 fits::KeywordVector result;
117 auto literal_kws = fits::ReadKeywords(target.GetFitsFile(), primary_hdu_num);
118 LOG4CPLUS_DEBUG(logger, "Read keywords from " << target.GetFilePath());
119 std::for_each(std::begin(literal_kws), std::end(literal_kws), [&](auto const& kw) {
120 LOG4CPLUS_DEBUG(logger, kw);
121 });
122 result.reserve(literal_kws.size());
123 std::copy(std::begin(literal_kws), std::end(literal_kws), std::back_inserter(result));
124 // Filter
125 try {
126 // note: By default we keep all keywords in the target
127 result = target.GetKeywordRuleProcessor().Process(
128 result, target.GetInitialKeywords().value_or(KeywordRuleProcessor::DefaultRule::All));
129 LOG4CPLUS_DEBUG(logger, "Result after keyword processing: " << target.GetFilePath());
130 std::for_each(std::begin(result), std::end(result), [&](auto const& kw) {
131 LOG4CPLUS_DEBUG(logger, kw);
132 });
133 } catch (...) {
134 auto msg = fmt::format("Processing keyword rules failed for target FITS file '{}'",
135 target.GetFilePath().string());
136 LOG4CPLUS_ERROR(logger, msg);
137 std::throw_with_nested(std::runtime_error(msg));
138 }
139
140 for (auto const& source : sources) {
141 std::string path_or_name;
142 auto kws = std::visit(
143 [&](auto const& source) -> fits::KeywordVector {
144 using T = std::decay_t<decltype(source)>;
145 if constexpr (std::is_same_v<T, FitsKeywordsSource>) {
146 path_or_name = fmt::format("{}: (keyword list)", source.GetName());
147 return CompileKeywords(ops, source);
148 } else if constexpr (std::is_same_v<T, FitsFileSource>) {
149 path_or_name =
150 fmt::format("{}: {}", source.GetName(), source.GetFilePath().native());
151 return CompileKeywords(ops, source, 1);
152 }
153 },
154 source);
155 LOG4CPLUS_DEBUG(logger, "Updating with keywords from : " << path_or_name);
156 std::for_each(
157 std::begin(kws), std::end(kws), [&](auto const& kw) { LOG4CPLUS_DEBUG(logger, kw); });
158 // Don't overwrite conflicting keywords
160 }
161
162 return result;
163}
164
165std::vector<fits::LiteralKeyword> FormatKeywords(fits::KeywordVector::const_iterator begin,
166 fits::KeywordVector::const_iterator end,
167 KeywordFormatter& fmt) {
168 auto const& logger = log4cplus::Logger::getInstance("daq.dpmmerge");
169 std::vector<fits::LiteralKeyword> result;
170 LOG4CPLUS_DEBUG(logger, "Formatting keywords ...");
171 std::transform(
172 begin, end, std::back_inserter(result), [&](auto const& kw) -> fits::LiteralKeyword {
173 LOG4CPLUS_DEBUG(logger, "Formatting keyword input: \"" << kw << "\"");
174 auto formatted = fmt.FormatKeyword(kw);
175 LOG4CPLUS_DEBUG(logger, "Formatting keyword result: \"" << formatted << "\"");
176 return formatted;
177 });
178 LOG4CPLUS_DEBUG(logger, "Formatting keywords done.");
179 return result;
180}
181
182template <class Container>
183void LogKeywords(log4cplus::Logger const& logger, Container const& keywords) {
184 std::for_each(std::begin(keywords), std::end(keywords), [&](auto const& kw) {
185 LOG4CPLUS_DEBUG(logger, kw);
186 });
187}
188
190 Params const& params,
191 TargetSource& target,
192 std::vector<SourceTypes> const& sources,
193 bool dry_run) {
194 auto const& logger = ops.logger;
195 LOG4CPLUS_INFO(logger, "Merge primary HDU keywords");
196 LOG4CPLUS_INFO(logger, "Compile primary keywords");
197 constexpr const int primary_hdu_num = 1;
198 auto primary_hdu_keywords = CompilePrimaryHduKeywords(ops, target, sources);
199
200 // Add ORIGFILE and ARCFILE last (will be sorted to be last of value keywords)
201 fits::KeywordVector mandatory;
202 mandatory.emplace_back(std::in_place_type<fits::ValueKeyword>, "ORIGFILE", params.origfile);
203 mandatory.emplace_back(std::in_place_type<fits::ValueKeyword>, "ARCFILE", params.arcfile);
205 primary_hdu_keywords, primary_hdu_keywords.end(), mandatory.begin(), mandatory.end());
206
207 LOG4CPLUS_INFO(logger, "Format keywords");
208 auto formatted = FormatKeywords(
209 std::begin(primary_hdu_keywords), std::end(primary_hdu_keywords), ops.keyword_formatter);
210 {
211 LOG4CPLUS_INFO(logger, "Sort keywords");
212 ops.keyword_sorter.SortKeywords(formatted);
213 LOG4CPLUS_DEBUG(logger, "Sorted keywords");
214 LogKeywords(logger, formatted);
215 }
216
217 // Write keywords back
218 // Write keywords will make room as necessary.
219 if (!dry_run) {
220 LOG4CPLUS_INFO(logger, "Clear keywords to make room for writing back sorted keywords.");
221 fits::DeleteAllKeywords(target.GetFitsFile(), primary_hdu_num);
222 LOG4CPLUS_INFO(logger, "Writing keywords");
223 std::optional<ssize_t> remaining_size;
224 fits::WriteKeywords(target.GetFitsFile(), primary_hdu_num, formatted, &remaining_size);
225 if (remaining_size) {
226 if (*remaining_size < 0) {
227 auto needed = -*remaining_size;
228 // Reallocation occurred
229 auto msg = fmt::format(
230 "Writing keywords required resizing of primary HDU: Add space for at least "
231 "{} "
232 "keywords to avoid resize",
233 needed);
234 ops.status_reporter.PostAlert("primary_hdu_resize", msg);
235 } else {
236 auto msg = fmt::format("Primary HDU keyword space remaining: {} ", *remaining_size);
237 LOG4CPLUS_INFO(logger, msg);
238 }
239 }
240 } else {
241 LOG4CPLUS_INFO(logger, "Writing keywords SKIPPED (dry-run)");
242 }
243}
244
245/**
246 * Copy all extensions from source to target
247 */
249 TargetSource& target,
250 FitsFileSource const& source,
251 bool dry_run) {
252 auto const& logger = ops.logger;
253 LOG4CPLUS_INFO(
254 logger,
255 "Merging HDU extensions from " << source.GetName() << "(" << source.GetFilePath() << ")");
256
257 // Check for unmerged primary HDU data.
259
260 int status = 0;
261 fitsfile* source_fits = source.GetFitsFile();
262 fitsfile* target_fits = target.GetFitsFile();
263
264 int num_hdus = 0;
265 fits_get_num_hdus(source_fits, &num_hdus, &status);
266 if (status != 0) {
267 auto msg =
268 fmt::format("Failed to get number of HDUs from '{}'", target.GetFilePath().c_str());
269 LOG4CPLUS_ERROR(logger, msg);
270 throw fits::CfitsioError(status, msg);
271 }
272
273 if (num_hdus == 1) {
274 // @todo There should be a proper check of the first HDU to see that it does not contain
275 // any data!
276 LOG4CPLUS_INFO(ops.logger,
277 "Note: No HDU extensions to merge from " << source.GetFilePath());
278 return;
279 }
280
281 // Select HDU 2 to copy
282 fits::SelectHduNum(source_fits, 2);
283 if (!dry_run) {
284 // @todo Execute HDU by HDU to allow aborting in-between?
285 int previous = false;
286 int current = true;
287 int following = true;
288 fits_copy_file(source_fits, target_fits, previous, current, following, &status);
289 if (status != 0) {
290 auto const msg = "FITS function fits_copy_file failed";
291 LOG4CPLUS_ERROR(logger, msg);
292 throw fits::CfitsioError(status, msg);
293 }
294 } else {
295 LOG4CPLUS_INFO(logger,
296 "Merging HDU extensions from " << source.GetName() << "("
297 << source.GetFilePath()
298 << ") SKIPPED (dry-run)");
299 }
300}
301
303 TargetSource& target,
304 std::vector<SourceTypes> const& sources,
305 bool dry_run) {
306 auto const& logger = ops.logger;
307 LOG4CPLUS_INFO(logger, "Merging HDU extensions");
308
309 for (auto const& source_var : sources) {
310 if (!std::holds_alternative<FitsFileSource>(source_var)) {
311 continue;
312 }
313 FitsFileSource const& source = std::get<FitsFileSource>(source_var);
314 try {
315 CopyExtensions(ops, target, source, dry_run);
316 } catch (...) {
317 std::throw_with_nested(
318 std::runtime_error(fmt::format("Failed to copy HDU extensions from '{}' to '{}'",
319 source.GetFilePath().c_str(),
320 target.GetFilePath().c_str())));
321 }
322 }
323 LOG4CPLUS_INFO(logger, "Merging HDU extensions completed successfully");
324}
325
326void UpdateChecksums(Operations ops, TargetSource& target, bool dry_run) {
327 auto const& logger = ops.logger;
328 LOG4CPLUS_INFO(logger, "Updating checksums for all HDUs");
329 int status = 0;
330 fitsfile* target_fits = target.GetFitsFile();
331 int num_hdus = 0;
332 fits_get_num_hdus(target_fits, &num_hdus, &status);
333 if (status != 0) {
334 auto msg =
335 fmt::format("Failed to get number of HDUs from '{}'", target.GetFilePath().c_str());
336 LOG4CPLUS_ERROR(logger, msg);
337 throw fits::CfitsioError(status, msg);
338 }
339
340 if (!dry_run) {
341 for (int hdu_num = 1; hdu_num <= num_hdus; ++hdu_num) {
342 LOG4CPLUS_DEBUG(logger, "Updating checksum for HDU " << hdu_num);
343 fits::WriteChecksum(target_fits, hdu_num);
344 }
345 } else {
346 LOG4CPLUS_INFO(logger, "Updating checkum keywords SKIPPED (dry-run)");
347 }
348 LOG4CPLUS_INFO(logger, "Updating checkum keywords completed for all HDUs successfully");
349}
350
352 Params const& params,
353 TargetSource& target,
354 std::vector<SourceTypes> const& sources,
355 bool dry_run) {
356 auto const& logger = ops.logger;
357 /*
358 * 1. Merge target primary HDU keywords
359 * Since adding keywords to primary HDU may shift the data section we do this first,
360 * before copying any HDU extensions
361 *
362 * 1. Compile keywords to be merged to target primary HDU.
363 * 2. Format and validate keywords.
364 * - Already formatted keywords (literal keywords) are compared with expected format.
365 * - Unformatted keywords are formatted using dictionary format string.
366 * 3. Sort keywords
367 * 4. Delete all existing keywords.
368 * 4. Write keywords cleanly (remove all then write back)
369 * 2. Copy HDU extensions to target.
370 */
371 // @todo Validate that there is no primary HDU data in any of the sources before starting
372 // the process.
373 LOG4CPLUS_INFO(logger, "Starting merge operation");
374 try {
375 MergePrimaryHduKeywords(ops, params, target, sources, dry_run);
376 } catch (...) {
377 std::throw_with_nested(std::runtime_error("Failed to merge primary HDU keywords"));
378 }
379
380 try {
381 MergeHduExtensions(ops, target, sources, dry_run);
382 } catch (...) {
383 std::throw_with_nested(std::runtime_error("Failed to merge primary HDU keywords"));
384 }
385 try {
386 UpdateChecksums(ops, target, dry_run);
387 } catch (...) {
388 std::throw_with_nested(std::runtime_error("Failed to update checksums"));
389 }
390
391 LOG4CPLUS_INFO(logger, "Completed successfully");
392}
393
394} // namespace daq::dpm::merge
Contains functions and data structures related to cfitsio.
@ User
Default is to keep only user-keywords.
@ All
Default rule is to keep all keywords (useful for in-place merge)
Input FITS source file.
Definition: sources.hpp:144
std::string const & GetLocation() const noexcept
Definition: sources.hpp:170
bool AlertUnmergeable() const noexcept
Definition: sources.hpp:174
fits::KeywordVector const & GetKeywords() const &noexcept
Definition: sources.hpp:204
virtual fits::LiteralKeyword FormatKeyword(fits::KeywordVariant const &keyword)=0
Format keyword.
virtual void SortKeywords(std::vector< fits::LiteralKeyword > &keywords)=0
Sort keywords.
Interface to reporter (implementations exist for JSON or human readable)
Definition: merge.hpp:26
virtual void PostAlert(std::string const &id, std::string const &message)=0
Post event.
Represents errors from cfitsio.
Definition: cfitsio.hpp:96
Represents the literal 80-character FITS keyword record.
Definition: keyword.hpp:129
fits::KeywordVector CompilePrimaryHduKeywords(Operations ops, TargetSource &target, std::vector< SourceTypes > const &sources)
Compiles unique keywords and formats primary HDU keywords.
Definition: merge.cpp:111
KeywordFormatter & keyword_formatter
Definition: merge.hpp:90
std::string arcfile
Definition: merge.hpp:101
fits::KeywordVector CompileKeywords(Operations ops, FitsKeywordsSource const &source)
Definition: merge.cpp:75
log4cplus::Logger const & logger
Definition: merge.hpp:92
void CheckUnmergedPrimaryHduData(log4cplus::Logger const &logger, StatusReporter &status_reporter, FitsFileSource const &source)
Check for data in primary HDU which would not be merged and produce alert.
Definition: merge.cpp:15
std::string origfile
Definition: merge.hpp:102
void UpdateChecksums(Operations ops, TargetSource &target, bool dry_run)
Definition: merge.cpp:326
KeywordSorter & keyword_sorter
Definition: merge.hpp:91
void CopyExtensions(Operations ops, TargetSource &target, FitsFileSource const &source, bool dry_run)
Copy all extensions from source to target.
Definition: merge.cpp:248
void Merge(Operations ops, Params const &params, TargetSource &target, std::vector< SourceTypes > const &sources, bool dry_run)
Merge sources into the target target.
Definition: merge.cpp:351
void LogKeywords(log4cplus::Logger const &logger, Container const &keywords)
Definition: merge.cpp:183
void MergeHduExtensions(Operations ops, TargetSource &target, std::vector< SourceTypes > const &sources, bool dry_run)
Definition: merge.cpp:302
StatusReporter & status_reporter
Definition: merge.hpp:89
void MergePrimaryHduKeywords(Operations ops, Params const &params, TargetSource &target, std::vector< SourceTypes > const &sources, bool dry_run)
Definition: merge.cpp:189
std::vector< fits::LiteralKeyword > FormatKeywords(fits::KeywordVector::const_iterator begin, fits::KeywordVector::const_iterator end, KeywordFormatter &fmt)
Definition: merge.cpp:165
void SelectHduNum(fitsfile *ptr, int hdu_num)
Select current HDU number.
Definition: cfitsio.cpp:108
void InsertKeywords(KeywordVector &keywords, KeywordVector::iterator position, KeywordVector::const_iterator from_first, KeywordVector::const_iterator from_last)
Insert keywords.
Definition: keyword.cpp:695
void WriteKeywords(fitsfile *ptr, int hdu_num, std::vector< LiteralKeyword > const &keywords, std::optional< ssize_t > *remaining_size)
Write keywords to HDU identified by number hdu_num.
Definition: cfitsio.cpp:266
void WriteChecksum(fitsfile *ptr, int hdu_num)
Write or update checksum keywords DATASUM and CHECKSUM to HDU specified by hdu_num.
Definition: cfitsio.cpp:302
void DeleteAllKeywords(fitsfile *ptr, int hdu_num)
Delete all keywords from HDU.
Definition: cfitsio.cpp:228
std::vector< KeywordVariant > KeywordVector
Vector of keywords.
Definition: keyword.hpp:423
void UpdateKeywords(KeywordVector &to, KeywordVector const &from, ConflictPolicy policy=ConflictPolicy::Replace)
Updates to with keywords from from.
Definition: keyword.cpp:679
std::vector< LiteralKeyword > ReadKeywords(fitsfile *ptr, int hdu_num)
Read keywords from HDU identifed by absolute position hdu_num.
Definition: cfitsio.cpp:200
@ Skip
Skip keyword that conflicts.