% This file, DNAdesign/notes-examples, is provided
% to help familiarize you with this extremely preliminary
% release of my DNA sequence design software.
%
% We will step by step construct sequences for the DAO A&B tiles
% of the type used in my Nature paper.
%
% The DAO in Winfree,Liu,Wenzler,Seeman were designed by a previous
% version of this software.  The DAE were designed by Seeman's SEQUIN.
% However, this file shows how similar sequences could be designed
% using this software.
%
% Most recently tested with MATLAB 6.5.1  (6/18/04)
%
%   DNAdesign package (Winfree lab, Caltech)

% For folks in my lab and other users of the Caltech DNA Group cluster:
%
% In MATLAB, execute "addpath /research/src/DNAdesign", as below, where
% the MATLAB files aleady exist.  Also, the relevant files are already 
% compiled there, so you don't need to do that. 

% For other folks who 
% All the files in DNAdesign should be unpacked into a directory
% of that name, possibly as a subdirectory of your working DNA design 
% directory.  Matlab will need to know where all the *.m files are,
% so that directory must be added into the path.  Here, I use the
% DNA cluster shared directory; I've commented out using my own local
% copy.

addpath /research/src/DNAdesign

% We should compile the speed-critical functions.
% Since the .mex files are put in the current working directory,
% we cd to where we want them to stay, i.e. the DNAdesign directory.
% Comments here are w/r to the MATLAB 6.5.1 R13 compiler.
% (NOTE: if your computers don't have the MATLAB compiler, 
% skip the mcc commands.  Everything will run 10x to 100x slower....)

%% NOTE: MATLAB dies a horrible death if it tries to run a mexglx file
%% from a directory found via an indirect path (e.g. '../DNAdesign').
%% Consequently, it is VERY IMPORTANT to use a full path name, above.
%% Here, I compile in the DNA cluster /research directory.  You shouldn't have 
%% to do this if you use the DNA cluster; it is already compiled for you.
%% [This comment may only apply to MATLAB v 6.0, compiler v 2.1. 
%% Untested since then.]

if 0 % This only needs to be done once.  It's already done on the DNA cluster.
workpath=pwd;
cd /research/src/DNAdesign

mcc -x -h spurious
mcc -x -h spurious1
mcc -x -h spuriousW
mcc -x -h dotplot
mcc -x -h constrain 
mcc -x -h randbase
mcc -x -h helix_dG
mcc -x -h stickyends
mcc -x -h verboten
mcc -x -h make_bad6mer 
mcc -x -h score_spurious
mcc -x -h score_eg
mcc -x -h score_TX
%% note: for debugging, it's best not to compile constraints.m

bad6mer=make_bad6mer;
save Bad6mer bad6mer

cd(workpath) 
end

% To understand stuff below, it helps to use help!  
% All the functions in this package have help information describing
% their data structures and what they do.  E.g. 'help constraints'

% The four strands for each complex:
% Sequence restrictions for each strand -- we hard code the junctions.
DAOseqA = ['NNNNNNNNNNNCACCNNNNNNNNNNN ' ...
           'NNNNNNGGACNNNNNNNNNNNNCACCNNNNNNNNNNNNCCTGNNNNNN ' ...
           'NNNNNNCCTGNNNNNNNNNNNNGTGGNNNNNNNNNNNNGGACNNNNNN ' ...
           'NNNNNNNNNNNGTGGNNNNNNNNNNN'];
DAOseqB = ['NNNNNNNNNNNGGTGNNNNNNNNNNN ' ...
           'NNNNNNCAGGNNNNNNNNNNNNGGTGNNNNNNNNNNNNGTCCNNNNNN ' ...
           'NNNNNNGTCCNNNNNNNNNNNNCCACNNNNNNNNNNNNCAGGNNNNNN ' ...
           'NNNNNNNNNNNCCACNNNNNNNNNNN'];

% Define which ds helices exist.  (see 'help constraints')
DAOhelixA = [1 6 2 48 8; 3 25 2 40 16; 3 41 4 13 8; ...
             2 1 1 21 8; 2 9 3 24 16; 4 14 3 8 8];
DAOhelixB = DAOhelixA;

% Put it together into one spec with 8 strands and requirements for
% the sticky-end complementarity.
% Note the '  ' to separete the first DX from the second.
% Note that the strands in B get renumbered when we put all 8 together.
DAOseq = [DAOseqA '  ' DAOseqB ];
DAOhelixB = ones(6,1)*[4 0 4 0 0] + DAOhelixB ;
DAOhelices = [ DAOhelixA; DAOhelixB ];
DAOsticky = [ 1 1 8 5 5; 1 22 8 26 5; 4 1 5 5 5; 4 22 5 26 5 ];

% Build the required sequence template (St), Watson-Crick base pairing (wc)
% and base equality (eq) data structures.
[St, wc, eq] = constraints(DAOseq, [DAOhelices; DAOsticky], []);
displayDX(St, DAOhelixA, [ 1 2 3; 4 5 6 ] )
displayDX(St, DAOhelixB, [ 1 2 3; 4 5 6 ] )

% Now play around with the data structures a bit, just for fun.
% Assign random bases according to the template.  Note lack of complementarity.
rS=randbase(St);
displayDX(rS, DAOhelixA, [ 1 2 3; 4 5 6 ] )

% Enforce the base-pairing and equality rules
S=constrain(rS,wc,eq);
displayDX(S, DAOhelixA, [ 1 2 3; 4 5 6 ] )
displayDX(S, DAOhelixB, [ 1 2 3; 4 5 6 ] )

% save the data structures, so we don't have to go through this again.
save DAOexample S St wc eq DAOhelixA DAOhelixB DAOhelices DAOseqA DAOseqB DAOseq DAOsticky

% How good is the random sequence S?  Diagonals show potential WC domains.
% Mwc shows the actual sequence, Mbp shows only the required base-pairings.
[Mwc,Mbp] = dotplot(S,wc,eq,Inf);
figure(1); imagesc(1-exp(-Mwc/16)); figure(2); imagesc(1-exp(-Mbp/16));

% The help notes tell you what this means.  How many places can
% a 6-mer find a complementary subsequence that was NOT intended?
% (i.e. how many unintended 6-long diagonals are there?)
% (here, we do 3-mers to 8-mers, and consider intramolecular, intermolecular,
% and intercomplex separately)
spurious(S, 3,8, wc,eq)

% As before, but allowing internal 1 mismatch (not counting perfect matches)
spurious1(S, 3,8)

% spuriousW doesn't give you the full breakdown, but it allows you
% to make some parts of the complex more important that others
% (e.g. junctions) ... however we don't do that here, to compare with above.
sum(spurious(S, 3,8, wc,eq)')
spuriousW(S, 3,8, wc, eq, ones(1,length(S)), [1 0 0]/2)
spuriousW(S, 3,8, wc, eq, ones(1,length(S)), [0 1 0]/2)
spuriousW(S, 3,8, wc, eq, ones(1,length(S)), [0 0 1]/2)

% A typical scoring system.  
weights = 4*(St=='N') + 12*(St~='N');  % junctions cost more!
prefactors = [2^(-8) 2^(-9) 2^(-12)];  % roughly same as Nature tiles
spuriousW(S, 3,8, wc, eq, weights, prefactors)

% What do these scores mean?  They don't have a rigorous connection
% to the probability of getting the correct DNA fold, unfortunately.
% So one has to use intuition to determine the relative weighting
% of various factors to set up in your scoring function.
% 'score_eg.m' shows one set of choices, reasonable for the DAO tile
% design.  Make a new 'score_mine.m' and modify it for your 
% design task.

% Now we'll do the stochastic optimization.
% several variable need to be made global, so functions can read them
clear
global bestS DAOsticky bad6mer
load Bad6mer
load DAOexample
optimize(S,St,wc,eq, 'score_eg')
% hit control-C to stop it when you're bored... e.g. a few hours.

% How well did it do?
spurious(S, 3,8, wc,eq)
spurious(bestS, 3,8, wc,eq)

spurious1(S, 3,8)
spurious1(bestS, 3,8)

displayDX(bestS, DAOhelixA, [ 1 2 3; 4 5 6 ] )
displayDX(bestS, DAOhelixB, [ 1 2 3; 4 5 6 ] )

figure(1); 
[Mwc,Mbp] = dotplot(S,wc,eq,Inf);
subplot(2,2,1); imagesc(log(1+Mwc)); 
subplot(2,2,2); imagesc(log(1+Mbp));
[Mwc,Mbp] = dotplot(bestS,wc,eq,Inf);
subplot(2,2,3); imagesc(log(1+Mwc)); 
subplot(2,2,4); imagesc(log(1+Mbp));
%% The lattice-like scattering of matches in Mbp are due to the fixed
%% (non-variable) sequences defined at the crossovers, e.g. GGAC.

stickyends(S,DAOsticky)
stickyends(bestS,DAOsticky)

% to continute optimizing,
optimize(bestS,St,wc,eq, 'score_eg')

% Have Fun!  ( Can you make a DAE? )