Last active 1 month ago

shuaizhou revised this gist 1 month ago. Go to revision

1 file changed, 28 insertions

ncbi_genome_download.sh

@@ -15,4 +15,32 @@ for f in batches/batch_*; do
15 15
16 16 # 关键:主动降速,避免触发 NCBI 限制
17 17 sleep 10
18 + done
19 +
20 +
21 + #!/usr/bin/env bash
22 +
23 + mkdir -p zips genomes
24 +
25 + for f in batches/batch_*; do
26 + base=$(basename "$f")
27 + out="zips/${base}.zip"
28 + dir="genomes/$base"
29 +
30 + # 如果已经有对应解压目录,就认为完成,直接跳过
31 + if [[ -d "$dir" ]]; then
32 + echo "SKIP (exists): $dir"
33 + continue
34 + fi
35 +
36 + echo "Downloading $f -> $out"
37 + datasets download genome accession --inputfile "$f" \
38 + --include genome,gff3,gbff \
39 + --filename "$out"
40 +
41 + mkdir -p "$dir"
42 + unzip -q "$out" -d "$dir"
43 +
44 + # 主动降速,避免触发 NCBI 限制
45 + sleep 10
18 46 done

shuaizhou revised this gist 1 month ago. Go to revision

1 file changed, 2 insertions

ncbi_genome_download.sh

@@ -1,3 +1,5 @@
1 + conda create -n ncbi_dl -y -c conda-forge ncbi-datasets-cli jq
2 +
1 3 for f in batches/batch_*; do
2 4 base=$(basename "$f")
3 5 out="zips/${base}.zip"

shuaizhou revised this gist 1 month ago. Go to revision

1 file changed, 16 insertions

ncbi_genome_download.sh(file created)

@@ -0,0 +1,16 @@
1 + for f in batches/batch_*; do
2 + base=$(basename "$f")
3 + out="zips/${base}.zip"
4 +
5 + echo "Downloading $f -> $out"
6 + datasets download genome accession --inputfile "$f" \
7 + --include genome,gff3,gbff \
8 + --filename "$out"
9 +
10 + # 解压到独立目录(也可以最后统一解压)
11 + mkdir -p "genomes/$base"
12 + unzip -q "$out" -d "genomes/$base"
13 +
14 + # 关键:主动降速,避免触发 NCBI 限制
15 + sleep 10
16 + done
Newer Older