Ostatnio aktywny 1 month ago

ncbi_genome_download.sh Surowy
1conda create -n ncbi_dl -y -c conda-forge ncbi-datasets-cli jq
2
3for f in batches/batch_*; do
4 base=$(basename "$f")
5 out="zips/${base}.zip"
6
7 echo "Downloading $f -> $out"
8 datasets download genome accession --inputfile "$f" \
9 --include genome,gff3,gbff \
10 --filename "$out"
11
12 # 解压到独立目录(也可以最后统一解压)
13 mkdir -p "genomes/$base"
14 unzip -q "$out" -d "genomes/$base"
15
16 # 关键:主动降速,避免触发 NCBI 限制
17 sleep 10
18done
19
20
21#!/usr/bin/env bash
22
23mkdir -p zips genomes
24
25for f in batches/batch_*; do
26 base=$(basename "$f")
27 out="zips/${base}.zip"
28 dir="genomes/$base"
29
30 # 如果已经有对应解压目录,就认为完成,直接跳过
31 if [[ -d "$dir" ]]; then
32 echo "SKIP (exists): $dir"
33 continue
34 fi
35
36 echo "Downloading $f -> $out"
37 datasets download genome accession --inputfile "$f" \
38 --include genome,gff3,gbff \
39 --filename "$out"
40
41 mkdir -p "$dir"
42 unzip -q "$out" -d "$dir"
43
44 # 主动降速,避免触发 NCBI 限制
45 sleep 10
46done