clear all
close all

RepetitiveBLASTDatabase = 'E:\09132023_200Loci\humanREP';
GenomeBLASTDatabase = 'E:\09132023_200Loci\hg19';
%% Processing OligoArray output

%% BLAST against repetitive sequence database

load('Oligos_NewRegions_12052023Updated.mat');

Score_Cutoff = 33;
OligosInitial = Oligos;
clear Oligos;

oligoFile = 'Oligos_buildOligoArray_NewRegions_12052023Updated.fasta';
out = 'BLAST_output_NewRegions_Repetitive_LongBlast_12052023Updated.txt';
system(['blastn -query ' oligoFile ' -db ' RepetitiveBLASTDatabase ' -out ' out ' -outfmt 10 -max_target_seqs 5 -num_threads 10']);

% go through the blast_output file and find good oligos
Blast_File = readtable(out);
[OligoList,ia1,ic1] = unique(Blast_File.Var1,'stable');
Bad_Oligo_Index = [];
Counter = 0;

for i = 1:length(OligoList)
    
    Sub_Blast_File = Blast_File(ic1==i,:);
    Sub_Blast_File_Sorted = Sub_Blast_File(Sub_Blast_File.Var12>Score_Cutoff,:);
       
    if isempty(Sub_Blast_File_Sorted) == 0
        Bad_Oligo_Index = [Bad_Oligo_Index i];
    else
        continue
    end
    
end

BadOligoList = OligoList(Bad_Oligo_Index);
OligosInitial_Table = struct2table(OligosInitial);
BadOligoRep_Index = matches(OligosInitial_Table.Header,BadOligoList);


BadOligoRep = OligosInitial(BadOligoRep_Index);
OligosInitial_Copy = OligosInitial;
OligosInitial(BadOligoRep_Index) = [];
GoodOligoRep = OligosInitial;



if exist('GoodOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta')
    delete('GoodOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta');
end
fastawrite('GoodOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta', GoodOligoRep);
save('Oligos_GoodOligoRep_Long_NewRegions_12052023Updated.mat','GoodOligoRep')

if exist('BadOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta')
    delete('BadOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta');
end
fastawrite('BadOligos_BLASTRep_Long_NewRegions_12052023Updated.fasta', BadOligoRep);


