./mk_all_data.sh ## Subset data work_dir="../../../data/processing_data/Different_species" # Get names of testing data(simple_total) # Random pick --num names of 4 kind of sequence NM NR NONCODE Ensembl. If the the size is small than --num, get the whole set. cat ${work_dir}/RefNoncEsb_all_filtered.fa | grep ">" |cut -d">" -f2 >${work_dir}/total_name.txt ./subRNE.py ${work_dir}/total_name.txt -k peri_spe_code.txt --num 500 > ${work_dir}/simple_total_name.txt ./subRNE.py ${work_dir}/total_name.txt -k core_spe_code.txt --num 2000 >> ${work_dir}/simple_total_name.txt # all spe random ./subRNE.py total_name.txt --num 25 > ${work_dir}/all_spe_name.txt # Get prediction data ./filter_fasta.py ${work_dir}/RefNoncEsb_all_filtered.fa simple_total_name.txt >${work_dir}/simple_total.fasta ./filter_fasta.py ${work_dir}/RefNoncEsb_all_filtered.fa all_spe_name.txt > ${work_dir}/all_spe.fasta # split for prediction mkdir split_simple ./split_fasta.py ${work_dir}/simple_total.fasta ${work_dir}/split_simple/simple 10 --bypart --lth 0