fix #117, allow auto download with tar format videos · dadwadw233/lmms-eval@8f6e846 (original) (raw)
`@@ -776,7 +776,6 @@ def _download_from_youtube(path):
`
776
776
`if accelerator.is_main_process:
`
777
777
`force_download = dataset_kwargs.get("force_download", False)
`
778
778
`force_unzip = dataset_kwargs.get("force_unzip", False)
`
779
``
`-
print(force_download)
`
780
779
`cache_path = snapshot_download(repo_id=self.DATASET_PATH, repo_type="dataset", force_download=force_download, etag_timeout=60)
`
781
780
`zip_files = glob(os.path.join(cache_path, "**/*.zip"), recursive=True)
`
782
781
`tar_files = glob(os.path.join(cache_path, "**/.tar"), recursive=True)
`
`@@ -797,15 +796,11 @@ def untar_video_data(tar_file):
`
797
796
``
798
797
``
799
798
`def concat_tar_parts(tar_parts, output_tar):
`
800
``
`-
print("This is the output file:", output_tar, "from:", tar_parts)
`
801
``
`-
try:
`
802
``
`-
with open(output_tar, 'wb') as out_tar:
`
803
``
`-
from tqdm import tqdm
`
804
``
`-
for part in tqdm(sorted(tar_parts)):
`
805
``
`-
with open(part, 'rb') as part_file:
`
806
``
`-
out_tar.write(part_file.read())
`
807
``
`-
except Exception as ex:
`
808
``
`-
print("Error!!!", ex)
`
``
799
`+
with open(output_tar, 'wb') as out_tar:
`
``
800
`+
from tqdm import tqdm
`
``
801
`+
for part in tqdm(sorted(tar_parts)):
`
``
802
`+
with open(part, 'rb') as part_file:
`
``
803
`+
out_tar.write(part_file.read())
`
809
804
`eval_logger.info(f"Concatenated parts {tar_parts} into {output_tar}")
`
810
805
``
811
806
`# Unzip zip files if needed
`
`@@ -824,7 +819,6 @@ def concat_tar_parts(tar_parts, output_tar):
`
824
819
`tar_parts_dict[base_name] = []
`
825
820
`tar_parts_dict[base_name].append(tar_file)
`
826
821
``
827
``
`-
print(tar_parts_dict)
`
828
822
``
829
823
`# Concatenate and untar split parts
`
830
824
`for base_name, parts in tar_parts_dict.items():
`