clear all
close all

%% OligoArrayOutputFile = 'oligos1.txt';
RepetitiveBLASTDatabase = 'E:\09132023_200Loci\humanREP';
GenomeBLASTDatabase = 'E:\09132023_200Loci\hg19';


%% BLAST against repetitive sequence database

load('Oligos_GoodOligoRep_Long_NewRegions_12052023Updated.mat');

Score_Cutoff = 33;
OligosInitial = GoodOligoRep;
clear GoodOligoRep;

oligoFile = 'GoodOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta';
out = 'BLAST_output_NewRegions_Genome_LongBlast_12052023Updated.txt';
system(['blastn -query ' oligoFile ' -db ' GenomeBLASTDatabase ' -out ' out ' -outfmt 10 -max_target_seqs 5 -num_threads 10']);

% go through the blast_output file and find good oligos
Blast_File = readtable(out);

[OligoList,ia1,ic1] = unique(Blast_File.Var1,'stable');
Bad_Oligo_Index = [];
Counter = 0;

for i = 1:length(OligoList)
    
    Sub_Blast_File = Blast_File(ic1==i,:);
    Sub_Blast_File_Sorted = Sub_Blast_File(Sub_Blast_File.Var12>Score_Cutoff,:);
       
    if size(Sub_Blast_File_Sorted,1) > 1
        Bad_Oligo_Index = [Bad_Oligo_Index i];
    else
        continue
    end
    
end

BadOligoList = OligoList(Bad_Oligo_Index);
OligosInitial_Table = struct2table(OligosInitial);
BadOligoRep_Index = matches(OligosInitial_Table.Header,BadOligoList);


BadOligoRep = OligosInitial(BadOligoRep_Index);
OligosInitial_Copy = OligosInitial;
OligosInitial(BadOligoRep_Index) = [];
GoodOligoRep = OligosInitial;




if exist('GoodOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta')
    delete('GoodOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta');
end
fastawrite('GoodOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta', GoodOligoRep);
save('Oligos_GoodOligoGenome_Long_NewRegions_12052023Updated.mat','GoodOligoRep')

if exist('BadOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta')
    delete('BadOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta');
end
fastawrite('BadOligos_BLASTGenome_Long_NewRegions_12052023Updated.fasta', BadOligoRep);