fix #117, allow auto download with tar format videos · dadwadw233/lmms-eval@8f6e846 (original) (raw)

`@@ -776,7 +776,6 @@ def _download_from_youtube(path):

`

776

776

`if accelerator.is_main_process:

`

777

777

`force_download = dataset_kwargs.get("force_download", False)

`

778

778

`force_unzip = dataset_kwargs.get("force_unzip", False)

`

779

``

`-

print(force_download)

`

780

779

`cache_path = snapshot_download(repo_id=self.DATASET_PATH, repo_type="dataset", force_download=force_download, etag_timeout=60)

`

781

780

`zip_files = glob(os.path.join(cache_path, "**/*.zip"), recursive=True)

`

782

781

`tar_files = glob(os.path.join(cache_path, "**/.tar"), recursive=True)

`

`@@ -797,15 +796,11 @@ def untar_video_data(tar_file):

`

797

796

``

798

797

``

799

798

`def concat_tar_parts(tar_parts, output_tar):

`

800

``

`-

print("This is the output file:", output_tar, "from:", tar_parts)

`

801

``

`-

try:

`

802

``

`-

with open(output_tar, 'wb') as out_tar:

`

803

``

`-

from tqdm import tqdm

`

804

``

`-

for part in tqdm(sorted(tar_parts)):

`

805

``

`-

with open(part, 'rb') as part_file:

`

806

``

`-

out_tar.write(part_file.read())

`

807

``

`-

except Exception as ex:

`

808

``

`-

print("Error!!!", ex)

`

``

799

`+

with open(output_tar, 'wb') as out_tar:

`

``

800

`+

from tqdm import tqdm

`

``

801

`+

for part in tqdm(sorted(tar_parts)):

`

``

802

`+

with open(part, 'rb') as part_file:

`

``

803

`+

out_tar.write(part_file.read())

`

809

804

`eval_logger.info(f"Concatenated parts {tar_parts} into {output_tar}")

`

810

805

``

811

806

`# Unzip zip files if needed

`

`@@ -824,7 +819,6 @@ def concat_tar_parts(tar_parts, output_tar):

`

824

819

`tar_parts_dict[base_name] = []

`

825

820

`tar_parts_dict[base_name].append(tar_file)

`

826

821

``

827

``

`-

print(tar_parts_dict)

`

828

822

``

829

823

`# Concatenate and untar split parts

`

830

824

`for base_name, parts in tar_parts_dict.items():

`