./mk_all_data.sh
## Subset data
work_dir="../../../data/processing_data/Different_species"
# Get names of testing data(simple_total)
# Random pick --num names of 4 kind of sequence NM NR NONCODE Ensembl. If the the size is small than --num, get the whole set.
cat ${work_dir}/RefNoncEsb_all_filtered.fa | grep ">" |cut -d">" -f2 >${work_dir}/total_name.txt
./subRNE.py ${work_dir}/total_name.txt -k peri_spe_code.txt --num 500 > ${work_dir}/simple_total_name.txt
./subRNE.py ${work_dir}/total_name.txt -k core_spe_code.txt --num 2000 >> ${work_dir}/simple_total_name.txt
# all spe random
./subRNE.py total_name.txt --num 25 > ${work_dir}/all_spe_name.txt

# Get prediction data
./filter_fasta.py ${work_dir}/RefNoncEsb_all_filtered.fa simple_total_name.txt >${work_dir}/simple_total.fasta 
./filter_fasta.py ${work_dir}/RefNoncEsb_all_filtered.fa all_spe_name.txt > ${work_dir}/all_spe.fasta 

# split for prediction
mkdir split_simple
./split_fasta.py ${work_dir}/simple_total.fasta ${work_dir}/split_simple/simple 10 --bypart --lth 0