Skip to content

API Reference

cli

Command line interface.

Provides access to basic functionality in the API from the command line.

main()

Entry point for the CLI.

This is called from project.scripts in pyproject.toml

Source code in caroline_download/cli.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def main():
    """Entry point for the CLI.

    This is called from project.scripts in pyproject.toml

    """
    # Parse arguments and store result in args variable
    args = parse_args()

    # Build configuration
    config = get_config(args=args)

    # Setup logging
    logger = setup_logging(log_config=config.logging)

    logger.info(
        f"Starting {PROGRAM_NAME}"
        f" v{importlib.metadata.version('caroline-download')}"
    )
    logger.debug(f"Configuration: {config}")

    download(
        download_config=config.download,
        geo_search=config.geo_search,
        product_search=config.product_search,
    )

parse_args()

Parse command line arguments.

Returns:

Type Description
Namespace

the parsed arguments

Source code in caroline_download/cli.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def parse_args():
    """Parse command line arguments.

    Returns
    -------
    argparse.Namespace
        the parsed arguments
    """
    # Create argument parser
    parser = argparse.ArgumentParser(
        prog=PROGRAM_NAME,
        description=f"{DESCRIPTION}",
        epilog=f"Author: {AUTHOR} <{AUTHOR_EMAIL}>",
    )

    # Add arguments to argument parser
    parser.add_argument(
        "--config",
        help="configuration file to use",
    )
    parser.add_argument(
        "--geo-search",
        help="download based on geo search",
    )
    parser.add_argument("--product-search", help="download a single product")
    parser.add_argument(
        "--force",
        action="store_true",
        help="force downloading, even if a product already exists locally",
    )
    parser.add_argument(
        "--verify", action="store_true", help="verify checksum after downloading"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="perform dry run. do not actually download anything",
    )
    parser.add_argument("--log-file", help="log to LOG_FILE")
    parser.add_argument("--log-level", help="set log level")
    parser.add_argument(
        "--quiet", action="store_true", help="do not log anything to stderr"
    )

    return parser.parse_args()

setup_logging(log_config)

Set up logging.

Parameters:

Name Type Description Default
log_config

logging configuration object

required
Source code in caroline_download/cli.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def setup_logging(log_config):
    """Set up logging.

    Parameters
    ----------
    log_config: int
        logging configuration object

    """
    console_log = logging.StreamHandler(sys.stdout)
    console_log_format = logging.Formatter(log_config.console_log.format)
    console_log.setLevel(log_config.console_log.level.value)
    console_log.setFormatter(console_log_format)

    if log_config.file_log.file:
        try:
            file_log = TimedRotatingFileHandler(
                log_config.file_log.file, when="midnight", backupCount=31
            )
            file_log_format = logging.Formatter(log_config.file_log.format)
            file_log.setLevel(log_config.file_log.level.value)
            file_log.setFormatter(file_log_format)
        except Exception as err:
            # Abort if we cannot create the log file as requested
            print(
                "Failed to create log file. " + f"Reason: {err}. Aborting.",
                file=sys.stderr,
            )
            sys.exit(1)

    root_logger = logging.getLogger()
    root_logger.setLevel(log_config.root_logger.level.value)
    root_logger.addHandler(console_log)
    if log_config.file_log.file:
        root_logger.addHandler(file_log)

    cli_logger = logging.getLogger(PROGRAM_NAME)
    cli_logger.setLevel(log_config.cli_logger.level.value)
    cli_logger.propagate = False
    cli_logger.addHandler(console_log)
    if log_config.file_log.file:
        cli_logger.addHandler(file_log)

    download_logger = logging.getLogger("caroline_download.download")
    download_logger.setLevel(log_config.download_logger.level.value)
    download_logger.propagate = False
    download_logger.addHandler(console_log)
    if log_config.file_log.file:
        download_logger.addHandler(file_log)

    asf_logger = logging.getLogger("asf-search")
    asf_logger.setLevel(log_config.asf_logger.level.value)
    asf_logger.propagate = False
    asf_logger.addHandler(console_log)
    if log_config.file_log.file:
        asf_logger.addHandler(file_log)

    return cli_logger

config

Handle configuration for the CLI.

Implementation based on suggestions from: https://tech.preferred.jp/en/blog/working-with-configuration-in-python/

Config(download, geo_search, product_search, logging=Logging()) dataclass

Main configuration data class.

ConsoleLog(enable=True, level=LogLevel[DEFAULT_LOG_LEVEL], format=DEFAULT_LOG_FORMAT) dataclass

Data class for console log handler configuration.

Download(base_directory, force=False, dry_run=False, verify=True) dataclass

Data class for download configuration.

FileLog(file, level=LogLevel[DEFAULT_LOG_LEVEL], format=DEFAULT_LOG_FORMAT) dataclass

Data class for file log handler configuration.

GeoSearch(dataset, start, end, roi_wkt_file, relative_orbits, product_type) dataclass

Data class for search configuration.

LogLevel

Bases: Enum

Enum for logging levels.

Logger(level=LogLevel[DEFAULT_LOG_LEVEL]) dataclass

Data class for logger configuration.

Logging(console_log=ConsoleLog(enable=True, level=LogLevel[DEFAULT_LOG_LEVEL], format=DEFAULT_LOG_FORMAT), file_log=FileLog(file=None, level=LogLevel[DEFAULT_LOG_LEVEL], format=DEFAULT_LOG_FORMAT), root_logger=Logger(level=LogLevel['WARNING']), cli_logger=Logger(level=LogLevel[DEFAULT_LOG_LEVEL]), download_logger=Logger(level=LogLevel[DEFAULT_LOG_LEVEL]), asf_logger=Logger(level=LogLevel['WARNING'])) dataclass

Data class for logging configuration.

get_config(args)

Get configuration.

Get configuration from files, arguments and defaults.

Source code in caroline_download/config.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def get_config(args):
    """Get configuration.

    Get configuration from files, arguments and defaults.

    """
    config_dict = {}
    config_file = None

    # Check that either config dir is set in environment or
    # config argument is used
    if not any((os.environ.get("CAROLINE_DOWNLOAD_CONFIG_DIR"), args.config)):
        print("ERROR: No configuration specified. Aborting.", file=sys.stderr)
        sys.exit(1)

    # If CONFIG_DIR is set, set config file to
    # CONFIG_DIR/caroline_download.yml
    if os.environ.get("CAROLINE_DOWNLOAD_CONFIG_DIR"):
        config_file = os.path.join(
            os.environ["CAROLINE_DOWNLOAD_CONFIG_DIR"], "caroline-download.yml"
        )
    # If config argument is used, use that config file in stead
    if args.config:
        config_file = args.config

    # Check that the config file exists before we read it
    if not os.path.exists(config_file):
        # Config file does not exist
        print(f"ERROR: File not found: {config_file}", file=sys.stderr)
        sys.exit(1)

    # Open config file and read into config_dict
    with open(config_file, "r") as config_file:
        config_dict = yaml.safe_load(config_file)

    if not any((args.geo_search, args.product_search)):
        print(
            "ERROR: You must use either the --geo-search " + "or the --product option.",
            file=sys.stderr,
        )
        sys.exit(1)

    if args.geo_search:
        # Read yaml specified in argument and merge in config_dict
        if os.path.exists(args.geo_search):
            with open(args.geo_search, "r") as geo_search_file:
                geo_search_dict = yaml.safe_load(geo_search_file)
            config_dict.update(geo_search_dict)
        else:
            print(f"File not found: {args.geo_search}", file=sys.stderr)
            sys.exit(1)

    config = dacite.from_dict(
        data_class=Config, data=config_dict, config=dacite.Config(type_hooks=converters)
    )

    if args.product_search:
        config.product_search = args.product_search

    if args.force:
        config.download.force = True

    if args.dry_run:
        config.download.dry_run = True

    if args.log_file:
        config.logging.file_log.file = args.log_file

    if args.log_level:
        config.logging.console_log.level = LogLevel[args.log_level.upper()]
        config.logging.file_log.level = LogLevel[args.log_level.upper()]
        config.logging.root_logger.level = LogLevel[args.log_level.upper()]
        config.logging.cli_logger.level = LogLevel[args.log_level.upper()]
        config.logging.download_logger.level = LogLevel[args.log_level.upper()]
        config.logging.asf_logger.level = LogLevel[args.log_level.upper()]

    if args.quiet:
        config.logging.console_log.level = LogLevel["NOTSET"]

    if config.geo_search:
        if not config.geo_search.roi_wkt_file.exists():
            print(
                f"ERROR: No such file: {config.geo_search.roi_wkt_file}",
                file=sys.stderr,
            )
            sys.exit(1)

    return config

parse_datetime(datetime)

Parse time(range) specifications.

Allows parsing of human formatted time(range) specifications such as 'one month ago' to a datetime

Source code in caroline_download/config.py
117
118
119
120
121
122
123
def parse_datetime(datetime):
    """Parse time(range) specifications.

    Allows parsing of human formatted time(range)
    specifications such as 'one month ago' to a datetime
    """
    return dateparser.parse(datetime).replace(microsecond=0)

download

Download.

compose_product_download_path(base_directory, file_name, relative_orbit, orbit_direction, polarization)

Compose product download path.

Parameters:

Name Type Description Default
base_directory

The base download directory

required
file_name

The name of the file to download

required
relative_orbit

The relative orbit of the product

required
orbit_direction

The orbit direction of the product

required
polarization

The polarization of the product

required

Returns:

Type Description
str

A string representation of the composed path

Source code in caroline_download/download.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def compose_product_download_path(
    base_directory, file_name, relative_orbit, orbit_direction, polarization
):
    """Compose product download path.

    Parameters
    ----------
    base_directory: str
        The base download directory
    file_name: str
        The name of the file to download
    relative_orbit: str
        The relative orbit of the product
    orbit_direction: str
        The orbit direction of the product
    polarization: str
        The polarization of the product

    Returns
    -------
    str
        A string representation of the composed path
    """
    # Log debugging information
    logger.debug("base_directory: %s.", base_directory)
    logger.debug("file_name: %s.", file_name)
    logger.debug("relative_orbit: %s.", relative_orbit)
    logger.debug("orbit_direction: %s.", orbit_direction)
    logger.debug("polarization: %s.", polarization)

    # Translate orbit direction from what we get from ASF API to what
    # we use in the path
    orbit_direction = {
        "ASCENDING": "asc",
        "DESCENDING": "dsc",
    }.get(orbit_direction)

    # Pad track number (relative_orbit) with leading zero's when it has
    # less than 3 characters
    relative_orbit = relative_orbit.zfill(3)

    # Extract dataset from filename
    dataset = file_name[4:16]

    # Remove '+' from polarization
    polarization = polarization.replace("+", "")

    # Get startdate from filename
    year = file_name[17:21]
    month = file_name[21:23]
    day = file_name[23:25]

    # Construct path from rewritten variables
    path = base_directory
    path = path.joinpath("s1_" + orbit_direction + "_t" + relative_orbit)
    path = path.joinpath(dataset + "_" + polarization)
    path = path.joinpath(year + month + day)

    logger.debug("directory path: %s.", path)

    # Return composed path
    return path

download(download_config, geo_search=None, product_search=None)

Download.

Parameters:

Name Type Description Default
download_config

download configuration

required
geo_search

search configuration

None
product_search

product name

None
Source code in caroline_download/download.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def download(download_config, geo_search=None, product_search=None):
    """Download.

    Parameters
    ----------
    download_config:
        download configuration
    geo_search:
        search configuration
    product_search:
        product name
    """
    logger.info("Starting download")
    logger.debug(f"Download configuration: {download_config}")

    if product_search:
        logger.info(f"Performing product search for product {product_search}")
        result = asf.product_search(product_search)
        product_count = len(result)

        if product_count > 1:
            logger.error(
                "Found more than one product while performing "
                "product search. This should not happen according "
                "to the ASF api documentation. Aborting."
            )
            sys.exit(1)

        logger.info(f"Found {str(product_count)} products")
        download_products(download_config, result)

    if geo_search:
        logger.info(f"Performing geo search with {geo_search}")

        # read wkt string from geo_search.roi_wkt_file into var
        with open(geo_search.roi_wkt_file, "r") as wkt_file:
            wkt_str = wkt_file.read().replace("\n", "")

        # validate wkt string using shapely
        # TODO

        # perform search
        # TODO split interval into montly intervals
        for interval in split_into_monthly_intervals(geo_search.start, geo_search.end):
            result = asf.geo_search(
                dataset=geo_search.dataset,
                start=interval[0],
                end=interval[1],
                intersectsWith=wkt_str,
                relativeOrbit=geo_search.relative_orbits,
                processingLevel=geo_search.product_type,
            )

            product_count = len(result)
            logger.info(f"Found {str(product_count)} products")
            download_products(download_config, result)

    logger.info("Download done")

download_product(download_config, product)

Download a product.

Parameters:

Name Type Description Default
download_config

download configuration

required
product

the product to download

required
Source code in caroline_download/download.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def download_product(download_config, product):
    """Download a product.

    Parameters
    ----------
    download_config:
        download configuration
    product:
        the product to download

    """
    target_directory = compose_product_download_path(
        base_directory=download_config.base_directory,
        file_name=product.properties["fileName"],
        relative_orbit=str(product.properties["pathNumber"]),
        orbit_direction=product.properties["flightDirection"],
        polarization=product.properties["polarization"],
    )

    target_file = target_directory.joinpath(product.properties["fileName"])

    logger.debug(f"Target directory: {target_directory}")
    logger.debug(f"Target file: {target_file}")

    if os.path.isfile(target_file) and not download_config.force:
        logger.debug(
            f"Target file: {target_file} already exists. "
            "Force option not set. "
            "Skipping download"
        )
        return

    if os.path.isfile(target_file) and download_config.force:
        logger.debug(
            f"Target file: {target_file} already exists. "
            "Force option set. "
            "Removing file: {target_file}"
        )
        if not download_config.dry_run:
            os.remove(target_file)

    logger.debug("Creating directories")
    if not download_config.dry_run:
        os.makedirs(target_directory, exist_ok=True)

    logger.info(f"Downloading {product.properties['fileName']}")
    if not download_config.dry_run:
        product.download(path=target_directory)

        if download_config.verify:
            logger.info("Verifying checksum")
            if verify_checksum(file=target_file, checksum=product.properties["md5sum"]):
                logger.info("Checksum OK")
            else:
                logger.error("Checksum FAILED")
                return

        product_geojson_file = str(target_file)[:-4] + ".json"
        logger.info("Saving product geojson to " f"{product_geojson_file}")
        f = open(product_geojson_file, "w")
        f.write(json.dumps(product.geojson(), indent=2))
        f.close()

download_products(download_config, result)

Download products from a result.

Parameters:

Name Type Description Default
download_config

download configuration

required
result

query result

required
Source code in caroline_download/download.py
147
148
149
150
151
152
153
154
155
156
157
158
159
def download_products(download_config, result):
    """Download products from a result.

    Parameters
    ----------
    download_config:
        download configuration
    result:
        query result

    """
    for product in result:
        download_product(download_config, product)

split_into_monthly_intervals(start_datetime, end_datetime)

Split interval into monthly intervals.

Parameters:

Name Type Description Default
start_datetime

start of the interval

required
end_datetime

end of the interval

required

Returns:

Type Description
list

A list of intervals

Notes

Intervals are split on the end of the month, so even if the interval is smaller than one month but the interval includes the end of a month two intervals are returned.

Source code in caroline_download/download.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def split_into_monthly_intervals(start_datetime, end_datetime):
    """Split interval into monthly intervals.

    Parameters
    ----------
    start_datetime: datetime
        start of the interval
    end_datetime: datetime
        end of the interval

    Returns
    -------
    list
        A list of intervals

    Notes
    -----
    Intervals are split on the end of the month, so even if the interval is
    smaller than one month but the interval includes the end of a month two
    intervals are returned.
    """
    logger.debug(
        f"Splitting interval {start_datetime} - {end_datetime} "
        "into monthly intervals"
    )
    logger.debug(f"Log level: {logger.level}")
    intervals = []
    current_start = start_datetime

    while current_start < end_datetime:
        # Calculate the start of the next month
        next_month_start = (current_start + relativedelta(months=1)).replace(
            day=1, hour=0, minute=0, second=0, microsecond=0
        )
        # Calculate the end of the current interval (end of current month)
        current_end = min(next_month_start - timedelta(seconds=1), end_datetime)
        intervals.append((current_start, current_end))
        current_start = next_month_start

    logger.debug(f"Returning {len(intervals)} intervals")
    logger.debug(f"{intervals}")
    return intervals

verify_checksum(file, checksum)

Verify checksum of a file.

Parameters:

Name Type Description Default
file

The file to verify the checksum of

required
checksum str

The checksum to compare against

required

Returns:

Type Description
bool

True if checksum matches, False if it doesn't

Source code in caroline_download/download.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def verify_checksum(file, checksum: str):
    """Verify checksum of a file.

    Parameters
    ----------
    file:
        The file to verify the checksum of
    checksum: str
        The checksum to compare against

    Returns
    -------
    bool
        True if checksum matches, False if it doesn't
    """
    # Log debugging info
    logger.debug("file: %s.", file)
    logger.debug("original checksum: %s.", checksum)

    # Open the file
    with open(file, "rb") as f:
        # Compute the checksum, chunking the checksum process so as
        # not to fill up memory
        computed_checksum = hashlib.md5()
        chunk = f.read(8192)
        while chunk:
            computed_checksum.update(chunk)
            chunk = f.read(8192)

    # Log debugging info
    logger.debug("computed checksum: %s.", computed_checksum.hexdigest())

    # Compare checksum provided as argument against checksum of file
    if checksum != computed_checksum.hexdigest():
        # Checksum does not match
        return False
    else:
        # Checksum matches
        return True