File size: 1,168 Bytes
3133b5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import os
from typing import List
from urllib.request import urlretrieve
from zipfile import ZipFile
import pandas as pd
from src.utils.logging_utils import get_pylogger
log = get_pylogger(__name__)
def filter_dataframe_and_get_column(
dataframe: pd.DataFrame, filter_column: str, filter_value: str, select_column: str
) -> List[str]:
return dataframe[dataframe[filter_column] == filter_value][select_column].tolist()
def download_and_unzip(
url: str, target_path: str, force_download: bool = False, remove_tmp_file: bool = False
):
log.warning(f"download zip file from {url} to {target_path} ...")
if not (url.startswith("http://") or url.startswith("https://")):
raise ValueError(f"url needs to point to a http(s) address, but it is: {url}")
tmp_file = os.path.join(target_path, os.path.basename(url))
if os.path.exists(tmp_file) and not force_download:
log.warning(f"tmp file {tmp_file} already exists, skip downloading {url}")
else:
urlretrieve(url, tmp_file) # nosec
with ZipFile(tmp_file, "r") as zfile:
zfile.extractall(target_path)
if remove_tmp_file:
os.remove(tmp_file)
|