<?xml version="1.0" encoding="ascii"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <title>Bio.MaxEntropy</title> <link rel="stylesheet" href="epydoc.css" type="text/css" /> <script type="text/javascript" src="epydoc.js"></script> </head> <body bgcolor="white" text="black" link="blue" vlink="#204080" alink="#204080"> <!-- ==================== NAVIGATION BAR ==================== --> <table class="navbar" border="0" width="100%" cellpadding="0" bgcolor="#a0c0ff" cellspacing="0"> <tr valign="middle"> <!-- Tree link --> <th> <a href="module-tree.html">Trees</a> </th> <!-- Index link --> <th> <a href="identifier-index.html">Indices</a> </th> <!-- Help link --> <th> <a href="help.html">Help</a> </th> <th class="navbar" width="100%"></th> </tr> </table> <table width="100%" cellpadding="0" cellspacing="0"> <tr valign="top"> <td width="100%"> <span class="breadcrumbs"> <a href="Bio-module.html">Package Bio</a> :: Module MaxEntropy </span> </td> <td> <table cellpadding="0" cellspacing="0"> <!-- hide/show private --> <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink" onclick="toggle_private();">hide private</a>]</span></td></tr> <tr><td align="right"><span class="options" >[<a href="frames.html" target="_top">frames</a >] | <a href="Bio.MaxEntropy-pysrc.html" target="_top">no frames</a>]</span></td></tr> </table> </td> </tr> </table> <h1 class="epydoc">Source Code for <a href="Bio.MaxEntropy-module.html">Module Bio.MaxEntropy</a></h1> <pre class="py-src"> <a name="L1"></a><tt class="py-lineno"> 1</tt> <tt class="py-line"><tt class="py-comment"># Copyright 2001 by Jeffrey Chang. All rights reserved.</tt> </tt> <a name="L2"></a><tt class="py-lineno"> 2</tt> <tt class="py-line"><tt class="py-comment"></tt><tt class="py-comment"># This code is part of the Biopython distribution and governed by its</tt> </tt> <a name="L3"></a><tt class="py-lineno"> 3</tt> <tt class="py-line"><tt class="py-comment"></tt><tt class="py-comment"># license. Please see the LICENSE file that should have been included</tt> </tt> <a name="L4"></a><tt class="py-lineno"> 4</tt> <tt class="py-line"><tt class="py-comment"></tt><tt class="py-comment"># as part of this package.</tt> </tt> <a name="L5"></a><tt class="py-lineno"> 5</tt> <tt class="py-line"><tt class="py-comment"></tt> </tt> <a name="L6"></a><tt class="py-lineno"> 6</tt> <tt class="py-line"><tt class="py-docstring">"""</tt> </tt> <a name="L7"></a><tt class="py-lineno"> 7</tt> <tt class="py-line"><tt class="py-docstring">Maximum Entropy code.</tt> </tt> <a name="L8"></a><tt class="py-lineno"> 8</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L9"></a><tt class="py-lineno"> 9</tt> <tt class="py-line"><tt class="py-docstring">Uses Improved Iterative Scaling:</tt> </tt> <a name="L10"></a><tt class="py-lineno"> 10</tt> <tt class="py-line"><tt class="py-docstring">XXX ref</tt> </tt> <a name="L11"></a><tt class="py-lineno"> 11</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L12"></a><tt class="py-lineno"> 12</tt> <tt class="py-line"><tt class="py-docstring"># XXX need to define terminology</tt> </tt> <a name="L13"></a><tt class="py-lineno"> 13</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L14"></a><tt class="py-lineno"> 14</tt> <tt class="py-line"><tt class="py-docstring">"""</tt> </tt> <a name="L15"></a><tt class="py-lineno"> 15</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">math</tt> </tt> <a name="L16"></a><tt class="py-lineno"> 16</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">Numeric</tt> <tt class="py-keyword">import</tt> <tt class="py-op">*</tt> </tt> <a name="L17"></a><tt class="py-lineno"> 17</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-0" class="py-name" targets="Package Bio=Bio-module.html"><a title="Bio" class="py-name" href="#" onclick="return doclink('link-0', 'Bio', 'link-0');">Bio</a></tt> <tt class="py-keyword">import</tt> <tt id="link-1" class="py-name" targets="Module Bio.listfns=Bio.listfns-module.html"><a title="Bio.listfns" class="py-name" href="#" onclick="return doclink('link-1', 'listfns', 'link-1');">listfns</a></tt> </tt> <a name="L18"></a><tt class="py-lineno"> 18</tt> <tt class="py-line"> </tt> <a name="L19"></a><tt class="py-lineno"> 19</tt> <tt class="py-line"><tt class="py-comment"># XXX typecodes for Numeric</tt> </tt> <a name="L20"></a><tt class="py-lineno"> 20</tt> <tt class="py-line"><tt class="py-comment"></tt><tt class="py-comment"># XXX multiprocessor</tt> </tt> <a name="L21"></a><tt class="py-lineno"> 21</tt> <tt class="py-line"><tt class="py-comment"></tt> </tt> <a name="L22"></a><tt class="py-lineno"> 22</tt> <tt class="py-line"><tt id="link-2" class="py-name" targets="Variable Bio.MaxEntropy.MAX_IIS_ITERATIONS=Bio.MaxEntropy-module.html#MAX_IIS_ITERATIONS"><a title="Bio.MaxEntropy.MAX_IIS_ITERATIONS" class="py-name" href="#" onclick="return doclink('link-2', 'MAX_IIS_ITERATIONS', 'link-2');">MAX_IIS_ITERATIONS</a></tt> <tt class="py-op">=</tt> <tt class="py-number">10000</tt> <tt class="py-comment"># Maximum iterations for IIS.</tt> </tt> <a name="L23"></a><tt class="py-lineno"> 23</tt> <tt class="py-line"><tt id="link-3" class="py-name" targets="Variable Bio.MaxEntropy.IIS_CONVERGE=Bio.MaxEntropy-module.html#IIS_CONVERGE"><a title="Bio.MaxEntropy.IIS_CONVERGE" class="py-name" href="#" onclick="return doclink('link-3', 'IIS_CONVERGE', 'link-3');">IIS_CONVERGE</a></tt> <tt class="py-op">=</tt> <tt class="py-number">1E-5</tt> <tt class="py-comment"># Convergence criteria for IIS.</tt> </tt> <a name="L24"></a><tt class="py-lineno"> 24</tt> <tt class="py-line"><tt id="link-4" class="py-name" targets="Variable Bio.MaxEntropy.MAX_NEWTON_ITERATIONS=Bio.MaxEntropy-module.html#MAX_NEWTON_ITERATIONS"><a title="Bio.MaxEntropy.MAX_NEWTON_ITERATIONS" class="py-name" href="#" onclick="return doclink('link-4', 'MAX_NEWTON_ITERATIONS', 'link-4');">MAX_NEWTON_ITERATIONS</a></tt> <tt class="py-op">=</tt> <tt class="py-number">100</tt> <tt class="py-comment"># Maximum iterations on Newton's method.</tt> </tt> <a name="L25"></a><tt class="py-lineno"> 25</tt> <tt class="py-line"><tt id="link-5" class="py-name" targets="Variable Bio.MaxEntropy.NEWTON_CONVERGE=Bio.MaxEntropy-module.html#NEWTON_CONVERGE"><a title="Bio.MaxEntropy.NEWTON_CONVERGE" class="py-name" href="#" onclick="return doclink('link-5', 'NEWTON_CONVERGE', 'link-5');">NEWTON_CONVERGE</a></tt> <tt class="py-op">=</tt> <tt class="py-number">1E-10</tt> <tt class="py-comment"># Convergence criteria for Newton's method.</tt> </tt> <a name="L26"></a><tt class="py-lineno"> 26</tt> <tt class="py-line"> </tt> <a name="MaxEntropy"></a><div id="MaxEntropy-def"><a name="L27"></a><tt class="py-lineno"> 27</tt> <a class="py-toggle" href="#" id="MaxEntropy-toggle" onclick="return toggle('MaxEntropy');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="Bio.MaxEntropy.MaxEntropy-class.html">MaxEntropy</a><tt class="py-op">:</tt> </tt> </div><div id="MaxEntropy-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="MaxEntropy-expanded"><a name="L28"></a><tt class="py-lineno"> 28</tt> <tt class="py-line"> <tt class="py-docstring">"""Holds information for a Maximum Entropy classifier.</tt> </tt> <a name="L29"></a><tt class="py-lineno"> 29</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L30"></a><tt class="py-lineno"> 30</tt> <tt class="py-line"><tt class="py-docstring"> Members:</tt> </tt> <a name="L31"></a><tt class="py-lineno"> 31</tt> <tt class="py-line"><tt class="py-docstring"> classes List of the possible classes of data.</tt> </tt> <a name="L32"></a><tt class="py-lineno"> 32</tt> <tt class="py-line"><tt class="py-docstring"> alphas List of the weights for each feature.</tt> </tt> <a name="L33"></a><tt class="py-lineno"> 33</tt> <tt class="py-line"><tt class="py-docstring"> feature_fns List of the feature functions.</tt> </tt> <a name="L34"></a><tt class="py-lineno"> 34</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L35"></a><tt class="py-lineno"> 35</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="MaxEntropy.__init__"></a><div id="MaxEntropy.__init__-def"><a name="L36"></a><tt class="py-lineno"> 36</tt> <a class="py-toggle" href="#" id="MaxEntropy.__init__-toggle" onclick="return toggle('MaxEntropy.__init__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy.MaxEntropy-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="MaxEntropy.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="MaxEntropy.__init__-expanded"><a name="L37"></a><tt class="py-lineno"> 37</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">classes</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> <a name="L38"></a><tt class="py-lineno"> 38</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">alphas</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> <a name="L39"></a><tt class="py-lineno"> 39</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">feature_fns</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> </div></div><a name="L40"></a><tt class="py-lineno"> 40</tt> <tt class="py-line"> </tt> <a name="calculate"></a><div id="calculate-def"><a name="L41"></a><tt class="py-lineno"> 41</tt> <a class="py-toggle" href="#" id="calculate-toggle" onclick="return toggle('calculate');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#calculate">calculate</a><tt class="py-op">(</tt><tt class="py-param">me</tt><tt class="py-op">,</tt> <tt class="py-param">observation</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="calculate-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="calculate-expanded"><a name="L42"></a><tt class="py-lineno"> 42</tt> <tt class="py-line"> <tt class="py-docstring">"""calculate(me, observation) -> list of log probs</tt> </tt> <a name="L43"></a><tt class="py-lineno"> 43</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L44"></a><tt class="py-lineno"> 44</tt> <tt class="py-line"><tt class="py-docstring"> Calculate the log of the probability for each class. me is a</tt> </tt> <a name="L45"></a><tt class="py-lineno"> 45</tt> <tt class="py-line"><tt class="py-docstring"> MaxEntropy object that has been trained. observation is a vector</tt> </tt> <a name="L46"></a><tt class="py-lineno"> 46</tt> <tt class="py-line"><tt class="py-docstring"> representing the observed data. The return value is a list of</tt> </tt> <a name="L47"></a><tt class="py-lineno"> 47</tt> <tt class="py-line"><tt class="py-docstring"> unnormalized log probabilities for each class.</tt> </tt> <a name="L48"></a><tt class="py-lineno"> 48</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L49"></a><tt class="py-lineno"> 49</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L50"></a><tt class="py-lineno"> 50</tt> <tt class="py-line"> <tt id="link-6" class="py-name" targets="Method Bio.Compass._Consumer.scores()=Bio.Compass._Consumer-class.html#scores"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-6', 'scores', 'link-6');">scores</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> <a name="L51"></a><tt class="py-lineno"> 51</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-name">klass</tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">classes</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L52"></a><tt class="py-lineno"> 52</tt> <tt class="py-line"> <tt class="py-name">lprob</tt> <tt class="py-op">=</tt> <tt class="py-number">0.0</tt> </tt> <a name="L53"></a><tt class="py-lineno"> 53</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-name">fn</tt><tt class="py-op">,</tt> <tt class="py-name">alpha</tt> <tt class="py-keyword">in</tt> <tt id="link-7" class="py-name" targets="Method Bio.GFF.FeatureAggregate.map()=Bio.GFF.FeatureAggregate-class.html#map"><a title="Bio.GFF.FeatureAggregate.map" class="py-name" href="#" onclick="return doclink('link-7', 'map', 'link-7');">map</a></tt><tt class="py-op">(</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">feature_fns</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">alphas</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L54"></a><tt class="py-lineno"> 54</tt> <tt class="py-line"> <tt class="py-name">lprob</tt> <tt class="py-op">+=</tt> <tt class="py-name">fn</tt><tt class="py-op">(</tt><tt class="py-name">observation</tt><tt class="py-op">,</tt> <tt class="py-name">klass</tt><tt class="py-op">)</tt> <tt class="py-op">*</tt> <tt class="py-name">alpha</tt> </tt> <a name="L55"></a><tt class="py-lineno"> 55</tt> <tt class="py-line"> <tt id="link-8" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-8', 'scores', 'link-6');">scores</a></tt><tt class="py-op">.</tt><tt id="link-9" class="py-name" targets="Method Bio.Crystal.Chain.append()=Bio.Crystal.Chain-class.html#append,Method Bio.EUtils.POM.ElementNode.append()=Bio.EUtils.POM.ElementNode-class.html#append,Method Bio.EUtils.sourcegen.SourceFile.append()=Bio.EUtils.sourcegen.SourceFile-class.html#append,Method Bio.SCOP.Raf.SeqMap.append()=Bio.SCOP.Raf.SeqMap-class.html#append,Method Bio.Seq.MutableSeq.append()=Bio.Seq.MutableSeq-class.html#append,Method Bio.Wise.psw.Alignment.append()=Bio.Wise.psw.Alignment-class.html#append,Method Bio.Wise.psw.AlignmentColumn.append()=Bio.Wise.psw.AlignmentColumn-class.html#append,Method Martel.msre_parse.SubPattern.append()=Martel.msre_parse.SubPattern-class.html#append"><a title="Bio.Crystal.Chain.append Bio.EUtils.POM.ElementNode.append Bio.EUtils.sourcegen.SourceFile.append Bio.SCOP.Raf.SeqMap.append Bio.Seq.MutableSeq.append Bio.Wise.psw.Alignment.append Bio.Wise.psw.AlignmentColumn.append Martel.msre_parse.SubPattern.append" class="py-name" href="#" onclick="return doclink('link-9', 'append', 'link-9');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">lprob</tt><tt class="py-op">)</tt> </tt> <a name="L56"></a><tt class="py-lineno"> 56</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt id="link-10" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-10', 'scores', 'link-6');">scores</a></tt> </tt> </div><a name="L57"></a><tt class="py-lineno"> 57</tt> <tt class="py-line"> </tt> <a name="classify"></a><div id="classify-def"><a name="L58"></a><tt class="py-lineno"> 58</tt> <a class="py-toggle" href="#" id="classify-toggle" onclick="return toggle('classify');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#classify">classify</a><tt class="py-op">(</tt><tt class="py-param">me</tt><tt class="py-op">,</tt> <tt class="py-param">observation</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="classify-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="classify-expanded"><a name="L59"></a><tt class="py-lineno"> 59</tt> <tt class="py-line"> <tt class="py-docstring">"""classify(me, observation) -> class</tt> </tt> <a name="L60"></a><tt class="py-lineno"> 60</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L61"></a><tt class="py-lineno"> 61</tt> <tt class="py-line"><tt class="py-docstring"> Classify an observation into a class.</tt> </tt> <a name="L62"></a><tt class="py-lineno"> 62</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L63"></a><tt class="py-lineno"> 63</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L64"></a><tt class="py-lineno"> 64</tt> <tt class="py-line"> <tt id="link-11" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-11', 'scores', 'link-6');">scores</a></tt> <tt class="py-op">=</tt> <tt id="link-12" class="py-name" targets="Function Bio.LogisticRegression.calculate()=Bio.LogisticRegression-module.html#calculate,Function Bio.MaxEntropy.calculate()=Bio.MaxEntropy-module.html#calculate,Function Bio.NaiveBayes.calculate()=Bio.NaiveBayes-module.html#calculate,Function Bio.kNN.calculate()=Bio.kNN-module.html#calculate"><a title="Bio.LogisticRegression.calculate Bio.MaxEntropy.calculate Bio.NaiveBayes.calculate Bio.kNN.calculate" class="py-name" href="#" onclick="return doclink('link-12', 'calculate', 'link-12');">calculate</a></tt><tt class="py-op">(</tt><tt class="py-name">me</tt><tt class="py-op">,</tt> <tt class="py-name">observation</tt><tt class="py-op">)</tt> </tt> <a name="L65"></a><tt class="py-lineno"> 65</tt> <tt class="py-line"> <tt class="py-name">max_score</tt><tt class="py-op">,</tt> <tt class="py-name">klass</tt> <tt class="py-op">=</tt> <tt id="link-13" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-13', 'scores', 'link-6');">scores</a></tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">classes</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt> <a name="L66"></a><tt class="py-lineno"> 66</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-14" class="py-name" targets="Variable Bio.PDB.Polypeptide.i=Bio.PDB.Polypeptide-module.html#i"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-14', 'i', 'link-14');">i</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-number">1</tt><tt class="py-op">,</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt id="link-15" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-15', 'scores', 'link-6');">scores</a></tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L67"></a><tt class="py-lineno"> 67</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt id="link-16" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-16', 'scores', 'link-6');">scores</a></tt><tt class="py-op">[</tt><tt id="link-17" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-17', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt> <tt class="py-op">></tt> <tt class="py-name">max_score</tt><tt class="py-op">:</tt> </tt> <a name="L68"></a><tt class="py-lineno"> 68</tt> <tt class="py-line"> <tt class="py-name">max_score</tt><tt class="py-op">,</tt> <tt class="py-name">klass</tt> <tt class="py-op">=</tt> <tt id="link-18" class="py-name"><a title="Bio.Compass._Consumer.scores" class="py-name" href="#" onclick="return doclink('link-18', 'scores', 'link-6');">scores</a></tt><tt class="py-op">[</tt><tt id="link-19" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-19', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">classes</tt><tt class="py-op">[</tt><tt id="link-20" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-20', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt> </tt> <a name="L69"></a><tt class="py-lineno"> 69</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">klass</tt> </tt> </div><a name="L70"></a><tt class="py-lineno"> 70</tt> <tt class="py-line"> </tt> <a name="_eval_feature_fn"></a><div id="_eval_feature_fn-def"><a name="L71"></a><tt class="py-lineno"> 71</tt> <a class="py-toggle" href="#" id="_eval_feature_fn-toggle" onclick="return toggle('_eval_feature_fn');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_eval_feature_fn">_eval_feature_fn</a><tt class="py-op">(</tt><tt class="py-param">fn</tt><tt class="py-op">,</tt> <tt class="py-param">xs</tt><tt class="py-op">,</tt> <tt class="py-param">classes</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_eval_feature_fn-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_eval_feature_fn-expanded"><a name="L72"></a><tt class="py-lineno"> 72</tt> <tt class="py-line"> <tt class="py-docstring">"""_eval_feature_fn(fn, xs, classes) -> dict of values</tt> </tt> <a name="L73"></a><tt class="py-lineno"> 73</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L74"></a><tt class="py-lineno"> 74</tt> <tt class="py-line"><tt class="py-docstring"> Evaluate a feature function on every instance of the training set</tt> </tt> <a name="L75"></a><tt class="py-lineno"> 75</tt> <tt class="py-line"><tt class="py-docstring"> and class. fn is a callback function that takes two parameters: a</tt> </tt> <a name="L76"></a><tt class="py-lineno"> 76</tt> <tt class="py-line"><tt class="py-docstring"> training instance and a class. Return a dictionary of (training</tt> </tt> <a name="L77"></a><tt class="py-lineno"> 77</tt> <tt class="py-line"><tt class="py-docstring"> set index, class index) -> non-zero value. Values of 0 are not</tt> </tt> <a name="L78"></a><tt class="py-lineno"> 78</tt> <tt class="py-line"><tt class="py-docstring"> stored in the dictionary.</tt> </tt> <a name="L79"></a><tt class="py-lineno"> 79</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L80"></a><tt class="py-lineno"> 80</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L81"></a><tt class="py-lineno"> 81</tt> <tt class="py-line"> <tt id="link-21" class="py-name" targets="Method Bio.Crystal.Crystal.values()=Bio.Crystal.Crystal-class.html#values,Method Bio.EUtils.MultiDict._BaseMultiDict.values()=Bio.EUtils.MultiDict._BaseMultiDict-class.html#values,Method Bio.GenBank.NCBIDictionary.values()=Bio.GenBank.NCBIDictionary-class.html#values,Method Bio.Mindy.BaseDB.DictLookup.values()=Bio.Mindy.BaseDB.DictLookup-class.html#values,Method Bio.Prosite.ExPASyDictionary.values()=Bio.Prosite.ExPASyDictionary-class.html#values,Method Bio.Prosite.Prodoc.ExPASyDictionary.values()=Bio.Prosite.Prodoc.ExPASyDictionary-class.html#values,Method Bio.PubMed.Dictionary.values()=Bio.PubMed.Dictionary-class.html#values,Method Bio.SwissProt.SProt.ExPASyDictionary.values()=Bio.SwissProt.SProt.ExPASyDictionary-class.html#values,Method Bio.config.Registry.Registry.values()=Bio.config.Registry.Registry-class.html#values,Method BioSQL.BioSeqDatabase.BioSeqDatabase.values()=BioSQL.BioSeqDatabase.BioSeqDatabase-class.html#values,Method BioSQL.BioSeqDatabase.DBServer.values()=BioSQL.BioSeqDatabase.DBServer-class.html#values,Method Martel.Parser.MartelAttributeList.values()=Martel.Parser.MartelAttributeList-class.html#values"><a title="Bio.Crystal.Crystal.values Bio.EUtils.MultiDict._BaseMultiDict.values Bio.GenBank.NCBIDictionary.values Bio.Mindy.BaseDB.DictLookup.values Bio.Prosite.ExPASyDictionary.values Bio.Prosite.Prodoc.ExPASyDictionary.values Bio.PubMed.Dictionary.values Bio.SwissProt.SProt.ExPASyDictionary.values Bio.config.Registry.Registry.values BioSQL.BioSeqDatabase.BioSeqDatabase.values BioSQL.BioSeqDatabase.DBServer.values Martel.Parser.MartelAttributeList.values" class="py-name" href="#" onclick="return doclink('link-21', 'values', 'link-21');">values</a></tt> <tt class="py-op">=</tt> <tt class="py-op">{</tt><tt class="py-op">}</tt> </tt> <a name="L82"></a><tt class="py-lineno"> 82</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-22" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-22', 'i', 'link-14');">i</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L83"></a><tt class="py-lineno"> 83</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-name">j</tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">classes</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L84"></a><tt class="py-lineno"> 84</tt> <tt class="py-line"> <tt class="py-name">f</tt> <tt class="py-op">=</tt> <tt class="py-name">fn</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">[</tt><tt id="link-23" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-23', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt> <a name="L85"></a><tt class="py-lineno"> 85</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">f</tt> <tt class="py-op">!=</tt> <tt class="py-number">0</tt><tt class="py-op">:</tt> </tt> <a name="L86"></a><tt class="py-lineno"> 86</tt> <tt class="py-line"> <tt id="link-24" class="py-name"><a title="Bio.Crystal.Crystal.values Bio.EUtils.MultiDict._BaseMultiDict.values Bio.GenBank.NCBIDictionary.values Bio.Mindy.BaseDB.DictLookup.values Bio.Prosite.ExPASyDictionary.values Bio.Prosite.Prodoc.ExPASyDictionary.values Bio.PubMed.Dictionary.values Bio.SwissProt.SProt.ExPASyDictionary.values Bio.config.Registry.Registry.values BioSQL.BioSeqDatabase.BioSeqDatabase.values BioSQL.BioSeqDatabase.DBServer.values Martel.Parser.MartelAttributeList.values" class="py-name" href="#" onclick="return doclink('link-24', 'values', 'link-21');">values</a></tt><tt class="py-op">[</tt><tt class="py-op">(</tt><tt id="link-25" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-25', 'i', 'link-14');">i</a></tt><tt class="py-op">,</tt> <tt class="py-name">j</tt><tt class="py-op">)</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">f</tt> </tt> <a name="L87"></a><tt class="py-lineno"> 87</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt id="link-26" class="py-name"><a title="Bio.Crystal.Crystal.values Bio.EUtils.MultiDict._BaseMultiDict.values Bio.GenBank.NCBIDictionary.values Bio.Mindy.BaseDB.DictLookup.values Bio.Prosite.ExPASyDictionary.values Bio.Prosite.Prodoc.ExPASyDictionary.values Bio.PubMed.Dictionary.values Bio.SwissProt.SProt.ExPASyDictionary.values Bio.config.Registry.Registry.values BioSQL.BioSeqDatabase.BioSeqDatabase.values BioSQL.BioSeqDatabase.DBServer.values Martel.Parser.MartelAttributeList.values" class="py-name" href="#" onclick="return doclink('link-26', 'values', 'link-21');">values</a></tt> </tt> </div><a name="L88"></a><tt class="py-lineno"> 88</tt> <tt class="py-line"> </tt> <a name="_calc_empirical_expects"></a><div id="_calc_empirical_expects-def"><a name="L89"></a><tt class="py-lineno"> 89</tt> <a class="py-toggle" href="#" id="_calc_empirical_expects-toggle" onclick="return toggle('_calc_empirical_expects');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_calc_empirical_expects">_calc_empirical_expects</a><tt class="py-op">(</tt><tt class="py-param">xs</tt><tt class="py-op">,</tt> <tt class="py-param">ys</tt><tt class="py-op">,</tt> <tt class="py-param">classes</tt><tt class="py-op">,</tt> <tt class="py-param">features</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_calc_empirical_expects-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_calc_empirical_expects-expanded"><a name="L90"></a><tt class="py-lineno"> 90</tt> <tt class="py-line"> <tt class="py-docstring">"""_calc_empirical_expects(xs, ys, classes, features) -> list of expectations</tt> </tt> <a name="L91"></a><tt class="py-lineno"> 91</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L92"></a><tt class="py-lineno"> 92</tt> <tt class="py-line"><tt class="py-docstring"> Calculate the expectation of each function from the data. This is</tt> </tt> <a name="L93"></a><tt class="py-lineno"> 93</tt> <tt class="py-line"><tt class="py-docstring"> the constraint for the maximum entropy distribution. Return a</tt> </tt> <a name="L94"></a><tt class="py-lineno"> 94</tt> <tt class="py-line"><tt class="py-docstring"> list of expectations, parallel to the list of features.</tt> </tt> <a name="L95"></a><tt class="py-lineno"> 95</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L96"></a><tt class="py-lineno"> 96</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L97"></a><tt class="py-lineno"> 97</tt> <tt class="py-line"> <tt class="py-comment"># E[f_i] = SUM_x,y P(x, y) f(x, y)</tt> </tt> <a name="L98"></a><tt class="py-lineno"> 98</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># = 1/N f(x, y)</tt> </tt> <a name="L99"></a><tt class="py-lineno"> 99</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">class2index</tt> <tt class="py-op">=</tt> <tt id="link-27" class="py-name"><a title="Bio.listfns" class="py-name" href="#" onclick="return doclink('link-27', 'listfns', 'link-1');">listfns</a></tt><tt class="py-op">.</tt><tt id="link-28" class="py-name" targets="Function Bio.listfns.itemindex()=Bio.listfns-module.html#itemindex"><a title="Bio.listfns.itemindex" class="py-name" href="#" onclick="return doclink('link-28', 'itemindex', 'link-28');">itemindex</a></tt><tt class="py-op">(</tt><tt class="py-name">classes</tt><tt class="py-op">)</tt> </tt> <a name="L100"></a><tt class="py-lineno">100</tt> <tt class="py-line"> <tt class="py-name">ys_i</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-name">class2index</tt><tt class="py-op">[</tt><tt class="py-name">y</tt><tt class="py-op">]</tt> <tt class="py-keyword">for</tt> <tt class="py-name">y</tt> <tt class="py-keyword">in</tt> <tt class="py-name">ys</tt><tt class="py-op">]</tt> </tt> <a name="L101"></a><tt class="py-lineno">101</tt> <tt class="py-line"> </tt> <a name="L102"></a><tt class="py-lineno">102</tt> <tt class="py-line"> <tt id="link-29" class="py-name" targets="Variable Bio.expressions.blast.ncbiblast.expect=Bio.expressions.blast.ncbiblast-module.html#expect"><a title="Bio.expressions.blast.ncbiblast.expect" class="py-name" href="#" onclick="return doclink('link-29', 'expect', 'link-29');">expect</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> <a name="L103"></a><tt class="py-lineno">103</tt> <tt class="py-line"> <tt class="py-name">N</tt> <tt class="py-op">=</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt> </tt> <a name="L104"></a><tt class="py-lineno">104</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-30" class="py-name" targets="Function Bio.Std.feature()=Bio.Std-module.html#feature,Variable Bio.expressions.embl.embl65.feature=Bio.expressions.embl.embl65-module.html#feature,Variable Bio.expressions.genbank.feature=Bio.expressions.genbank-module.html#feature"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-30', 'feature', 'link-30');">feature</a></tt> <tt class="py-keyword">in</tt> <tt id="link-31" class="py-name" targets="Method Bio.GFF.Segment.features()=Bio.GFF.Segment-class.html#features,Variable BioSQL.BioSeq.DBSeqRecord.features=BioSQL.BioSeq.DBSeqRecord-class.html#features,Method Martel.Expression.Expression.features()=Martel.Expression.Expression-class.html#features,Method Martel.Expression.ExpressionList.features()=Martel.Expression.ExpressionList-class.html#features,Method Martel.Expression.FastFeature.features()=Martel.Expression.FastFeature-class.html#features,Method Martel.Expression.Group.features()=Martel.Expression.Group-class.html#features,Method Martel.Expression.HeaderFooter.features()=Martel.Expression.HeaderFooter-class.html#features,Method Martel.Expression.MaxRepeat.features()=Martel.Expression.MaxRepeat-class.html#features,Method Martel.Expression.ParseRecords.features()=Martel.Expression.ParseRecords-class.html#features,Method Martel.Expression.PassThrough.features()=Martel.Expression.PassThrough-class.html#features"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-31', 'features', 'link-31');">features</a></tt><tt class="py-op">:</tt> </tt> <a name="L105"></a><tt class="py-lineno">105</tt> <tt class="py-line"> <tt id="link-32" class="py-name" targets="Variable Martel.test.test_swissprot38.s=Martel.test.test_swissprot38-module.html#s"><a title="Martel.test.test_swissprot38.s" class="py-name" href="#" onclick="return doclink('link-32', 's', 'link-32');">s</a></tt> <tt class="py-op">=</tt> <tt class="py-number">0</tt> </tt> <a name="L106"></a><tt class="py-lineno">106</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-33" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-33', 'i', 'link-14');">i</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">N</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L107"></a><tt class="py-lineno">107</tt> <tt class="py-line"> <tt id="link-34" class="py-name"><a title="Martel.test.test_swissprot38.s" class="py-name" href="#" onclick="return doclink('link-34', 's', 'link-32');">s</a></tt> <tt class="py-op">+=</tt> <tt id="link-35" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-35', 'feature', 'link-30');">feature</a></tt><tt class="py-op">.</tt><tt id="link-36" class="py-name" targets="Method Bio.Crystal.Crystal.get()=Bio.Crystal.Crystal-class.html#get,Method Bio.Data.CodonTable.AmbiguousForwardTable.get()=Bio.Data.CodonTable.AmbiguousForwardTable-class.html#get,Method Bio.EUtils.MultiDict._BaseMultiDict.get()=Bio.EUtils.MultiDict._BaseMultiDict-class.html#get,Method Bio.EUtils.POM.ElementNode.get()=Bio.EUtils.POM.ElementNode-class.html#get,Method Bio.GenBank.NCBIDictionary.get()=Bio.GenBank.NCBIDictionary-class.html#get,Method Bio.Mindy.BaseDB.DictLookup.get()=Bio.Mindy.BaseDB.DictLookup-class.html#get,Method Bio.Prosite.ExPASyDictionary.get()=Bio.Prosite.ExPASyDictionary-class.html#get,Method Bio.Prosite.Prodoc.ExPASyDictionary.get()=Bio.Prosite.Prodoc.ExPASyDictionary-class.html#get,Method Bio.PubMed.Dictionary.get()=Bio.PubMed.Dictionary-class.html#get,Method Bio.Restriction.Restriction.RestrictionBatch.get()=Bio.Restriction.Restriction.RestrictionBatch-class.html#get,Method Bio.Restriction._Update.RestrictionCompiler.DictionaryBuilder.get()=Bio.Restriction._Update.RestrictionCompiler.DictionaryBuilder-class.html#get,Method Bio.SeqUtils.MissingTable.get()=Bio.SeqUtils.MissingTable-class.html#get,Method Bio.SwissProt.SProt.ExPASyDictionary.get()=Bio.SwissProt.SProt.ExPASyDictionary-class.html#get,Method Bio.config.DBRegistry.DBGroup.get()=Bio.config.DBRegistry.DBGroup-class.html#get,Method Bio.config.DBRegistry.DBObject.get()=Bio.config.DBRegistry.DBObject-class.html#get,Method Bio.config.Registry.Registry.get()=Bio.config.Registry.Registry-class.html#get,Method Martel.Parser.MartelAttributeList.get()=Martel.Parser.MartelAttributeList-class.html#get,Method Martel.msre_parse.Tokenizer.get()=Martel.msre_parse.Tokenizer-class.html#get"><a title="Bio.Crystal.Crystal.get Bio.Data.CodonTable.AmbiguousForwardTable.get Bio.EUtils.MultiDict._BaseMultiDict.get Bio.EUtils.POM.ElementNode.get Bio.GenBank.NCBIDictionary.get Bio.Mindy.BaseDB.DictLookup.get Bio.Prosite.ExPASyDictionary.get Bio.Prosite.Prodoc.ExPASyDictionary.get Bio.PubMed.Dictionary.get Bio.Restriction.Restriction.RestrictionBatch.get Bio.Restriction._Update.RestrictionCompiler.DictionaryBuilder.get Bio.SeqUtils.MissingTable.get Bio.SwissProt.SProt.ExPASyDictionary.get Bio.config.DBRegistry.DBGroup.get Bio.config.DBRegistry.DBObject.get Bio.config.Registry.Registry.get Martel.Parser.MartelAttributeList.get Martel.msre_parse.Tokenizer.get" class="py-name" href="#" onclick="return doclink('link-36', 'get', 'link-36');">get</a></tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt id="link-37" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-37', 'i', 'link-14');">i</a></tt><tt class="py-op">,</tt> <tt class="py-name">ys_i</tt><tt class="py-op">[</tt><tt id="link-38" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-38', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-number">0</tt><tt class="py-op">)</tt> </tt> <a name="L108"></a><tt class="py-lineno">108</tt> <tt class="py-line"> <tt id="link-39" class="py-name"><a title="Bio.expressions.blast.ncbiblast.expect" class="py-name" href="#" onclick="return doclink('link-39', 'expect', 'link-29');">expect</a></tt><tt class="py-op">.</tt><tt id="link-40" class="py-name"><a title="Bio.Crystal.Chain.append Bio.EUtils.POM.ElementNode.append Bio.EUtils.sourcegen.SourceFile.append Bio.SCOP.Raf.SeqMap.append Bio.Seq.MutableSeq.append Bio.Wise.psw.Alignment.append Bio.Wise.psw.AlignmentColumn.append Martel.msre_parse.SubPattern.append" class="py-name" href="#" onclick="return doclink('link-40', 'append', 'link-9');">append</a></tt><tt class="py-op">(</tt><tt class="py-name">float</tt><tt class="py-op">(</tt><tt id="link-41" class="py-name"><a title="Martel.test.test_swissprot38.s" class="py-name" href="#" onclick="return doclink('link-41', 's', 'link-32');">s</a></tt><tt class="py-op">)</tt> <tt class="py-op">/</tt> <tt class="py-name">N</tt><tt class="py-op">)</tt> </tt> <a name="L109"></a><tt class="py-lineno">109</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt id="link-42" class="py-name"><a title="Bio.expressions.blast.ncbiblast.expect" class="py-name" href="#" onclick="return doclink('link-42', 'expect', 'link-29');">expect</a></tt> </tt> </div><a name="L110"></a><tt class="py-lineno">110</tt> <tt class="py-line"> </tt> <a name="_calc_model_expects"></a><div id="_calc_model_expects-def"><a name="L111"></a><tt class="py-lineno">111</tt> <a class="py-toggle" href="#" id="_calc_model_expects-toggle" onclick="return toggle('_calc_model_expects');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_calc_model_expects">_calc_model_expects</a><tt class="py-op">(</tt><tt class="py-param">xs</tt><tt class="py-op">,</tt> <tt class="py-param">classes</tt><tt class="py-op">,</tt> <tt class="py-param">features</tt><tt class="py-op">,</tt> <tt class="py-param">alphas</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_calc_model_expects-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_calc_model_expects-expanded"><a name="L112"></a><tt class="py-lineno">112</tt> <tt class="py-line"> <tt class="py-docstring">"""_calc_model_expects(xs, classes, features, alphas) -> list of expectations.</tt> </tt> <a name="L113"></a><tt class="py-lineno">113</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L114"></a><tt class="py-lineno">114</tt> <tt class="py-line"><tt class="py-docstring"> Calculate the expectation of each feature from the model. This is</tt> </tt> <a name="L115"></a><tt class="py-lineno">115</tt> <tt class="py-line"><tt class="py-docstring"> not used in maximum entropy training, but provides a good function</tt> </tt> <a name="L116"></a><tt class="py-lineno">116</tt> <tt class="py-line"><tt class="py-docstring"> for debugging.</tt> </tt> <a name="L117"></a><tt class="py-lineno">117</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L118"></a><tt class="py-lineno">118</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L119"></a><tt class="py-lineno">119</tt> <tt class="py-line"> <tt class="py-comment"># SUM_X P(x) SUM_Y P(Y|X) F(X, Y)</tt> </tt> <a name="L120"></a><tt class="py-lineno">120</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># = 1/N SUM_X SUM_Y P(Y|X) F(X, Y)</tt> </tt> <a name="L121"></a><tt class="py-lineno">121</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">p_yx</tt> <tt class="py-op">=</tt> <tt id="link-43" class="py-name" targets="Function Bio.MaxEntropy._calc_p_class_given_x()=Bio.MaxEntropy-module.html#_calc_p_class_given_x"><a title="Bio.MaxEntropy._calc_p_class_given_x" class="py-name" href="#" onclick="return doclink('link-43', '_calc_p_class_given_x', 'link-43');">_calc_p_class_given_x</a></tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt id="link-44" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-44', 'features', 'link-31');">features</a></tt><tt class="py-op">,</tt> <tt class="py-name">alphas</tt><tt class="py-op">)</tt> </tt> <a name="L122"></a><tt class="py-lineno">122</tt> <tt class="py-line"> </tt> <a name="L123"></a><tt class="py-lineno">123</tt> <tt class="py-line"> <tt class="py-name">expects</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt> </tt> <a name="L124"></a><tt class="py-lineno">124</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-45" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-45', 'feature', 'link-30');">feature</a></tt> <tt class="py-keyword">in</tt> <tt id="link-46" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-46', 'features', 'link-31');">features</a></tt><tt class="py-op">:</tt> </tt> <a name="L125"></a><tt class="py-lineno">125</tt> <tt class="py-line"> <tt id="link-47" class="py-name" targets="Method Bio.Nexus.Nexus.StepMatrix.sum()=Bio.Nexus.Nexus.StepMatrix-class.html#sum,Function Bio.utils.sum()=Bio.utils-module.html#sum"><a title="Bio.Nexus.Nexus.StepMatrix.sum Bio.utils.sum" class="py-name" href="#" onclick="return doclink('link-47', 'sum', 'link-47');">sum</a></tt> <tt class="py-op">=</tt> <tt class="py-number">0.0</tt> </tt> <a name="L126"></a><tt class="py-lineno">126</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt id="link-48" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-48', 'i', 'link-14');">i</a></tt><tt class="py-op">,</tt> <tt class="py-name">j</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt id="link-49" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-49', 'feature', 'link-30');">feature</a></tt><tt class="py-op">.</tt><tt id="link-50" class="py-name" targets="Method Bio.Crystal.Crystal.items()=Bio.Crystal.Crystal-class.html#items,Method Bio.EUtils.MultiDict._BaseMultiDict.items()=Bio.EUtils.MultiDict._BaseMultiDict-class.html#items,Variable Bio.Entrez.SerialSet.items=Bio.Entrez.SerialSet-module.html#items,Method Bio.GenBank.NCBIDictionary.items()=Bio.GenBank.NCBIDictionary-class.html#items,Method Bio.Mindy.BaseDB.DictLookup.items()=Bio.Mindy.BaseDB.DictLookup-class.html#items,Method Bio.Prosite.ExPASyDictionary.items()=Bio.Prosite.ExPASyDictionary-class.html#items,Method Bio.Prosite.Prodoc.ExPASyDictionary.items()=Bio.Prosite.Prodoc.ExPASyDictionary-class.html#items,Method Bio.PubMed.Dictionary.items()=Bio.PubMed.Dictionary-class.html#items,Method Bio.SwissProt.SProt.ExPASyDictionary.items()=Bio.SwissProt.SProt.ExPASyDictionary-class.html#items,Method Bio.config.Registry.Registry.items()=Bio.config.Registry.Registry-class.html#items,Function Bio.listfns.items()=Bio.listfns-module.html#items,Method BioSQL.BioSeqDatabase.BioSeqDatabase.items()=BioSQL.BioSeqDatabase.BioSeqDatabase-class.html#items,Method BioSQL.BioSeqDatabase.DBServer.items()=BioSQL.BioSeqDatabase.DBServer-class.html#items,Method Martel.Parser.MartelAttributeList.items()=Martel.Parser.MartelAttributeList-class.html#items"><a title="Bio.Crystal.Crystal.items Bio.EUtils.MultiDict._BaseMultiDict.items Bio.Entrez.SerialSet.items Bio.GenBank.NCBIDictionary.items Bio.Mindy.BaseDB.DictLookup.items Bio.Prosite.ExPASyDictionary.items Bio.Prosite.Prodoc.ExPASyDictionary.items Bio.PubMed.Dictionary.items Bio.SwissProt.SProt.ExPASyDictionary.items Bio.config.Registry.Registry.items Bio.listfns.items BioSQL.BioSeqDatabase.BioSeqDatabase.items BioSQL.BioSeqDatabase.DBServer.items Martel.Parser.MartelAttributeList.items" class="py-name" href="#" onclick="return doclink('link-50', 'items', 'link-50');">items</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L127"></a><tt class="py-lineno">127</tt> <tt class="py-line"> <tt id="link-51" class="py-name"><a title="Bio.Nexus.Nexus.StepMatrix.sum Bio.utils.sum" class="py-name" href="#" onclick="return doclink('link-51', 'sum', 'link-47');">sum</a></tt> <tt class="py-op">+=</tt> <tt class="py-name">p_yx</tt><tt class="py-op">[</tt><tt id="link-52" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-52', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt> <tt class="py-op">*</tt> <tt class="py-name">f</tt> </tt> <a name="L128"></a><tt class="py-lineno">128</tt> <tt class="py-line"> <tt class="py-name">expects</tt><tt class="py-op">.</tt><tt id="link-53" class="py-name"><a title="Bio.Crystal.Chain.append Bio.EUtils.POM.ElementNode.append Bio.EUtils.sourcegen.SourceFile.append Bio.SCOP.Raf.SeqMap.append Bio.Seq.MutableSeq.append Bio.Wise.psw.Alignment.append Bio.Wise.psw.AlignmentColumn.append Martel.msre_parse.SubPattern.append" class="py-name" href="#" onclick="return doclink('link-53', 'append', 'link-9');">append</a></tt><tt class="py-op">(</tt><tt id="link-54" class="py-name"><a title="Bio.Nexus.Nexus.StepMatrix.sum Bio.utils.sum" class="py-name" href="#" onclick="return doclink('link-54', 'sum', 'link-47');">sum</a></tt><tt class="py-op">/</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt> <a name="L129"></a><tt class="py-lineno">129</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">expects</tt> </tt> </div><a name="L130"></a><tt class="py-lineno">130</tt> <tt class="py-line"> </tt> <a name="_calc_p_class_given_x"></a><div id="_calc_p_class_given_x-def"><a name="L131"></a><tt class="py-lineno">131</tt> <a class="py-toggle" href="#" id="_calc_p_class_given_x-toggle" onclick="return toggle('_calc_p_class_given_x');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_calc_p_class_given_x">_calc_p_class_given_x</a><tt class="py-op">(</tt><tt class="py-param">xs</tt><tt class="py-op">,</tt> <tt class="py-param">classes</tt><tt class="py-op">,</tt> <tt class="py-param">features</tt><tt class="py-op">,</tt> <tt class="py-param">alphas</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_calc_p_class_given_x-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_calc_p_class_given_x-expanded"><a name="L132"></a><tt class="py-lineno">132</tt> <tt class="py-line"> <tt class="py-docstring">"""_calc_p_class_given_x(xs, classes, features, alphas) -> matrix</tt> </tt> <a name="L133"></a><tt class="py-lineno">133</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L134"></a><tt class="py-lineno">134</tt> <tt class="py-line"><tt class="py-docstring"> Calculate P(y|x), where y is the class and x is an instance from</tt> </tt> <a name="L135"></a><tt class="py-lineno">135</tt> <tt class="py-line"><tt class="py-docstring"> the training set. Return a XSxCLASSES matrix of probabilities.</tt> </tt> <a name="L136"></a><tt class="py-lineno">136</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L137"></a><tt class="py-lineno">137</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L138"></a><tt class="py-lineno">138</tt> <tt class="py-line"> <tt class="py-name">prob_yx</tt> <tt class="py-op">=</tt> <tt class="py-name">zeros</tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">classes</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt id="link-55" class="py-name" targets="Variable Bio.Affy.CelFile.Float32=Bio.Affy.CelFile-module.html#Float32,Variable Bio.LogisticRegression.Float32=Bio.LogisticRegression-module.html#Float32,Variable Bio.MarkovModel.Float32=Bio.MarkovModel-module.html#Float32,Variable Bio.MaxEntropy.Float32=Bio.MaxEntropy-module.html#Float32,Variable Bio.NaiveBayes.Float32=Bio.NaiveBayes-module.html#Float32,Variable Bio.Statistics.lowess.Float32=Bio.Statistics.lowess-module.html#Float32,Variable Bio.distance.Float32=Bio.distance-module.html#Float32,Variable Bio.kNN.Float32=Bio.kNN-module.html#Float32"><a title="Bio.Affy.CelFile.Float32 Bio.LogisticRegression.Float32 Bio.MarkovModel.Float32 Bio.MaxEntropy.Float32 Bio.NaiveBayes.Float32 Bio.Statistics.lowess.Float32 Bio.distance.Float32 Bio.kNN.Float32" class="py-name" href="#" onclick="return doclink('link-55', 'Float32', 'link-55');">Float32</a></tt><tt class="py-op">)</tt> </tt> <a name="L139"></a><tt class="py-lineno">139</tt> <tt class="py-line"> </tt> <a name="L140"></a><tt class="py-lineno">140</tt> <tt class="py-line"> <tt class="py-comment"># Calculate log P(y, x).</tt> </tt> <a name="L141"></a><tt class="py-lineno">141</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-keyword">for</tt> <tt id="link-56" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-56', 'feature', 'link-30');">feature</a></tt><tt class="py-op">,</tt> <tt class="py-name">alpha</tt> <tt class="py-keyword">in</tt> <tt id="link-57" class="py-name"><a title="Bio.GFF.FeatureAggregate.map" class="py-name" href="#" onclick="return doclink('link-57', 'map', 'link-7');">map</a></tt><tt class="py-op">(</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt id="link-58" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-58', 'features', 'link-31');">features</a></tt><tt class="py-op">,</tt> <tt class="py-name">alphas</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L142"></a><tt class="py-lineno">142</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt id="link-59" class="py-name" targets="Variable Bio.MarkovModel.x=Bio.MarkovModel-module.html#x,Variable Bio.Statistics.lowess.x=Bio.Statistics.lowess-module.html#x"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-59', 'x', 'link-59');">x</a></tt><tt class="py-op">,</tt> <tt class="py-name">y</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt id="link-60" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-60', 'feature', 'link-30');">feature</a></tt><tt class="py-op">.</tt><tt id="link-61" class="py-name"><a title="Bio.Crystal.Crystal.items Bio.EUtils.MultiDict._BaseMultiDict.items Bio.Entrez.SerialSet.items Bio.GenBank.NCBIDictionary.items Bio.Mindy.BaseDB.DictLookup.items Bio.Prosite.ExPASyDictionary.items Bio.Prosite.Prodoc.ExPASyDictionary.items Bio.PubMed.Dictionary.items Bio.SwissProt.SProt.ExPASyDictionary.items Bio.config.Registry.Registry.items Bio.listfns.items BioSQL.BioSeqDatabase.BioSeqDatabase.items BioSQL.BioSeqDatabase.DBServer.items Martel.Parser.MartelAttributeList.items" class="py-name" href="#" onclick="return doclink('link-61', 'items', 'link-50');">items</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L143"></a><tt class="py-lineno">143</tt> <tt class="py-line"> <tt class="py-name">prob_yx</tt><tt class="py-op">[</tt><tt id="link-62" class="py-name"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-62', 'x', 'link-59');">x</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">y</tt><tt class="py-op">]</tt> <tt class="py-op">+=</tt> <tt class="py-name">alpha</tt> <tt class="py-op">*</tt> <tt class="py-name">f</tt> </tt> <a name="L144"></a><tt class="py-lineno">144</tt> <tt class="py-line"> <tt class="py-comment"># Take an exponent to get P(y, x)</tt> </tt> <a name="L145"></a><tt class="py-lineno">145</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">prob_yx</tt> <tt class="py-op">=</tt> <tt id="link-63" class="py-name" targets="Variable Bio.Affy.CelFile.exp=Bio.Affy.CelFile-module.html#exp,Variable Bio.LogisticRegression.exp=Bio.LogisticRegression-module.html#exp,Variable Bio.MarkovModel.exp=Bio.MarkovModel-module.html#exp,Variable Bio.MaxEntropy.exp=Bio.MaxEntropy-module.html#exp,Variable Bio.NaiveBayes.exp=Bio.NaiveBayes-module.html#exp,Variable Bio.Statistics.lowess.exp=Bio.Statistics.lowess-module.html#exp,Variable Bio.distance.exp=Bio.distance-module.html#exp,Variable Bio.kNN.exp=Bio.kNN-module.html#exp"><a title="Bio.Affy.CelFile.exp Bio.LogisticRegression.exp Bio.MarkovModel.exp Bio.MaxEntropy.exp Bio.NaiveBayes.exp Bio.Statistics.lowess.exp Bio.distance.exp Bio.kNN.exp" class="py-name" href="#" onclick="return doclink('link-63', 'exp', 'link-63');">exp</a></tt><tt class="py-op">(</tt><tt class="py-name">prob_yx</tt><tt class="py-op">)</tt> </tt> <a name="L146"></a><tt class="py-lineno">146</tt> <tt class="py-line"> <tt class="py-comment"># Divide out the probability over each class, so we get P(y|x).</tt> </tt> <a name="L147"></a><tt class="py-lineno">147</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-keyword">for</tt> <tt id="link-64" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-64', 'i', 'link-14');">i</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L148"></a><tt class="py-lineno">148</tt> <tt class="py-line"> <tt class="py-name">z</tt> <tt class="py-op">=</tt> <tt id="link-65" class="py-name"><a title="Bio.Nexus.Nexus.StepMatrix.sum Bio.utils.sum" class="py-name" href="#" onclick="return doclink('link-65', 'sum', 'link-47');">sum</a></tt><tt class="py-op">(</tt><tt class="py-name">prob_yx</tt><tt class="py-op">[</tt><tt id="link-66" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-66', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt> <a name="L149"></a><tt class="py-lineno">149</tt> <tt class="py-line"> <tt class="py-name">prob_yx</tt><tt class="py-op">[</tt><tt id="link-67" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-67', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">prob_yx</tt><tt class="py-op">[</tt><tt id="link-68" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-68', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt> <tt class="py-op">/</tt> <tt class="py-name">z</tt> </tt> <a name="L150"></a><tt class="py-lineno">150</tt> <tt class="py-line"> </tt> <a name="L151"></a><tt class="py-lineno">151</tt> <tt class="py-line"> <tt class="py-comment">#prob_yx = []</tt> </tt> <a name="L152"></a><tt class="py-lineno">152</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment">#for i in range(len(xs)):</tt> </tt> <a name="L153"></a><tt class="py-lineno">153</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># z = 0.0 # Normalization factor for this x, over all classes.</tt> </tt> <a name="L154"></a><tt class="py-lineno">154</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># probs = [0.0] * len(classes)</tt> </tt> <a name="L155"></a><tt class="py-lineno">155</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># for j in range(len(classes)):</tt> </tt> <a name="L156"></a><tt class="py-lineno">156</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># log_p = 0.0 # log of the probability of f(x, y)</tt> </tt> <a name="L157"></a><tt class="py-lineno">157</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># for k in range(len(features)):</tt> </tt> <a name="L158"></a><tt class="py-lineno">158</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># log_p += alphas[k] * features[k].get((i, j), 0.0)</tt> </tt> <a name="L159"></a><tt class="py-lineno">159</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># probs[j] = math.exp(log_p)</tt> </tt> <a name="L160"></a><tt class="py-lineno">160</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># z += probs[j]</tt> </tt> <a name="L161"></a><tt class="py-lineno">161</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># # Normalize the probabilities for this x.</tt> </tt> <a name="L162"></a><tt class="py-lineno">162</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># probs = map(lambda x, z=z: x/z, probs)</tt> </tt> <a name="L163"></a><tt class="py-lineno">163</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># prob_yx.append(probs)</tt> </tt> <a name="L164"></a><tt class="py-lineno">164</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-keyword">return</tt> <tt class="py-name">prob_yx</tt> </tt> </div><a name="L165"></a><tt class="py-lineno">165</tt> <tt class="py-line"> </tt> <a name="_calc_f_sharp"></a><div id="_calc_f_sharp-def"><a name="L166"></a><tt class="py-lineno">166</tt> <a class="py-toggle" href="#" id="_calc_f_sharp-toggle" onclick="return toggle('_calc_f_sharp');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_calc_f_sharp">_calc_f_sharp</a><tt class="py-op">(</tt><tt class="py-param">N</tt><tt class="py-op">,</tt> <tt class="py-param">nclasses</tt><tt class="py-op">,</tt> <tt class="py-param">features</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_calc_f_sharp-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_calc_f_sharp-expanded"><a name="L167"></a><tt class="py-lineno">167</tt> <tt class="py-line"> <tt class="py-docstring">"""_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""</tt> </tt> <a name="L168"></a><tt class="py-lineno">168</tt> <tt class="py-line"> <tt class="py-comment"># f#(x, y) = SUM_i feature(x, y)</tt> </tt> <a name="L169"></a><tt class="py-lineno">169</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">f_sharp</tt> <tt class="py-op">=</tt> <tt class="py-name">zeros</tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-name">N</tt><tt class="py-op">,</tt> <tt class="py-name">nclasses</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt> <a name="L170"></a><tt class="py-lineno">170</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-69" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-69', 'feature', 'link-30');">feature</a></tt> <tt class="py-keyword">in</tt> <tt id="link-70" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-70', 'features', 'link-31');">features</a></tt><tt class="py-op">:</tt> </tt> <a name="L171"></a><tt class="py-lineno">171</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt id="link-71" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-71', 'i', 'link-14');">i</a></tt><tt class="py-op">,</tt> <tt class="py-name">j</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt id="link-72" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-72', 'feature', 'link-30');">feature</a></tt><tt class="py-op">.</tt><tt id="link-73" class="py-name"><a title="Bio.Crystal.Crystal.items Bio.EUtils.MultiDict._BaseMultiDict.items Bio.Entrez.SerialSet.items Bio.GenBank.NCBIDictionary.items Bio.Mindy.BaseDB.DictLookup.items Bio.Prosite.ExPASyDictionary.items Bio.Prosite.Prodoc.ExPASyDictionary.items Bio.PubMed.Dictionary.items Bio.SwissProt.SProt.ExPASyDictionary.items Bio.config.Registry.Registry.items Bio.listfns.items BioSQL.BioSeqDatabase.BioSeqDatabase.items BioSQL.BioSeqDatabase.DBServer.items Martel.Parser.MartelAttributeList.items" class="py-name" href="#" onclick="return doclink('link-73', 'items', 'link-50');">items</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L172"></a><tt class="py-lineno">172</tt> <tt class="py-line"> <tt class="py-name">f_sharp</tt><tt class="py-op">[</tt><tt id="link-74" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-74', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt> <tt class="py-op">+=</tt> <tt class="py-name">f</tt> </tt> <a name="L173"></a><tt class="py-lineno">173</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">f_sharp</tt> </tt> </div><a name="L174"></a><tt class="py-lineno">174</tt> <tt class="py-line"> </tt> <a name="_iis_solve_delta"></a><div id="_iis_solve_delta-def"><a name="L175"></a><tt class="py-lineno">175</tt> <a class="py-toggle" href="#" id="_iis_solve_delta-toggle" onclick="return toggle('_iis_solve_delta');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_iis_solve_delta">_iis_solve_delta</a><tt class="py-op">(</tt><tt class="py-param">N</tt><tt class="py-op">,</tt> <tt class="py-param">feature</tt><tt class="py-op">,</tt> <tt class="py-param">f_sharp</tt><tt class="py-op">,</tt> <tt class="py-param">empirical</tt><tt class="py-op">,</tt> <tt class="py-param">prob_yx</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_iis_solve_delta-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_iis_solve_delta-expanded"><a name="L176"></a><tt class="py-lineno">176</tt> <tt class="py-line"> <tt class="py-comment"># Solve delta using Newton's method for:</tt> </tt> <a name="L177"></a><tt class="py-lineno">177</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># SUM_x P(x) * SUM_c P(c|x) f_i(x, c) e^[delta_i * f#(x, c)] = 0</tt> </tt> <a name="L178"></a><tt class="py-lineno">178</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">delta</tt> <tt class="py-op">=</tt> <tt class="py-number">0.0</tt> </tt> <a name="L179"></a><tt class="py-lineno">179</tt> <tt class="py-line"> <tt class="py-name">iters</tt> <tt class="py-op">=</tt> <tt class="py-number">0</tt> </tt> <a name="L180"></a><tt class="py-lineno">180</tt> <tt class="py-line"> <tt class="py-keyword">while</tt> <tt class="py-name">iters</tt> <tt class="py-op"><</tt> <tt id="link-75" class="py-name"><a title="Bio.MaxEntropy.MAX_NEWTON_ITERATIONS" class="py-name" href="#" onclick="return doclink('link-75', 'MAX_NEWTON_ITERATIONS', 'link-4');">MAX_NEWTON_ITERATIONS</a></tt><tt class="py-op">:</tt> <tt class="py-comment"># iterate for Newton's method</tt> </tt> <a name="L181"></a><tt class="py-lineno">181</tt> <tt class="py-line"> <tt class="py-name">f_newton</tt> <tt class="py-op">=</tt> <tt class="py-name">df_newton</tt> <tt class="py-op">=</tt> <tt class="py-number">0.0</tt> <tt class="py-comment"># evaluate the function and derivative</tt> </tt> <a name="L182"></a><tt class="py-lineno">182</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt id="link-76" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-76', 'i', 'link-14');">i</a></tt><tt class="py-op">,</tt> <tt class="py-name">j</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt id="link-77" class="py-name"><a title="Bio.Std.feature Bio.expressions.embl.embl65.feature Bio.expressions.genbank.feature" class="py-name" href="#" onclick="return doclink('link-77', 'feature', 'link-30');">feature</a></tt><tt class="py-op">.</tt><tt id="link-78" class="py-name"><a title="Bio.Crystal.Crystal.items Bio.EUtils.MultiDict._BaseMultiDict.items Bio.Entrez.SerialSet.items Bio.GenBank.NCBIDictionary.items Bio.Mindy.BaseDB.DictLookup.items Bio.Prosite.ExPASyDictionary.items Bio.Prosite.Prodoc.ExPASyDictionary.items Bio.PubMed.Dictionary.items Bio.SwissProt.SProt.ExPASyDictionary.items Bio.config.Registry.Registry.items Bio.listfns.items BioSQL.BioSeqDatabase.BioSeqDatabase.items BioSQL.BioSeqDatabase.DBServer.items Martel.Parser.MartelAttributeList.items" class="py-name" href="#" onclick="return doclink('link-78', 'items', 'link-50');">items</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L183"></a><tt class="py-lineno">183</tt> <tt class="py-line"> <tt class="py-name">prod</tt> <tt class="py-op">=</tt> <tt class="py-name">prob_yx</tt><tt class="py-op">[</tt><tt id="link-79" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-79', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt> <tt class="py-op">*</tt> <tt class="py-name">f</tt> <tt class="py-op">*</tt> <tt class="py-name">math</tt><tt class="py-op">.</tt><tt id="link-80" class="py-name"><a title="Bio.Affy.CelFile.exp Bio.LogisticRegression.exp Bio.MarkovModel.exp Bio.MaxEntropy.exp Bio.NaiveBayes.exp Bio.Statistics.lowess.exp Bio.distance.exp Bio.kNN.exp" class="py-name" href="#" onclick="return doclink('link-80', 'exp', 'link-63');">exp</a></tt><tt class="py-op">(</tt><tt class="py-name">delta</tt> <tt class="py-op">*</tt> <tt class="py-name">f_sharp</tt><tt class="py-op">[</tt><tt id="link-81" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-81', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt> <a name="L184"></a><tt class="py-lineno">184</tt> <tt class="py-line"> <tt class="py-name">f_newton</tt> <tt class="py-op">+=</tt> <tt class="py-name">prod</tt> </tt> <a name="L185"></a><tt class="py-lineno">185</tt> <tt class="py-line"> <tt class="py-name">df_newton</tt> <tt class="py-op">+=</tt> <tt class="py-name">prod</tt> <tt class="py-op">*</tt> <tt class="py-name">f_sharp</tt><tt class="py-op">[</tt><tt id="link-82" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-82', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">[</tt><tt class="py-name">j</tt><tt class="py-op">]</tt> </tt> <a name="L186"></a><tt class="py-lineno">186</tt> <tt class="py-line"> <tt class="py-name">f_newton</tt><tt class="py-op">,</tt> <tt class="py-name">df_newton</tt> <tt class="py-op">=</tt> <tt class="py-name">empirical</tt> <tt class="py-op">-</tt> <tt class="py-name">f_newton</tt> <tt class="py-op">/</tt> <tt class="py-name">N</tt><tt class="py-op">,</tt> <tt class="py-op">-</tt><tt class="py-name">df_newton</tt> <tt class="py-op">/</tt> <tt class="py-name">N</tt> </tt> <a name="L187"></a><tt class="py-lineno">187</tt> <tt class="py-line"> </tt> <a name="L188"></a><tt class="py-lineno">188</tt> <tt class="py-line"> <tt class="py-name">ratio</tt> <tt class="py-op">=</tt> <tt class="py-name">f_newton</tt> <tt class="py-op">/</tt> <tt class="py-name">df_newton</tt> </tt> <a name="L189"></a><tt class="py-lineno">189</tt> <tt class="py-line"> <tt class="py-name">delta</tt> <tt class="py-op">-=</tt> <tt class="py-name">ratio</tt> </tt> <a name="L190"></a><tt class="py-lineno">190</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">math</tt><tt class="py-op">.</tt><tt id="link-83" class="py-name" targets="Variable Bio.Affy.CelFile.fabs=Bio.Affy.CelFile-module.html#fabs,Variable Bio.LogisticRegression.fabs=Bio.LogisticRegression-module.html#fabs,Variable Bio.MarkovModel.fabs=Bio.MarkovModel-module.html#fabs,Variable Bio.MaxEntropy.fabs=Bio.MaxEntropy-module.html#fabs,Variable Bio.NaiveBayes.fabs=Bio.NaiveBayes-module.html#fabs,Variable Bio.Statistics.lowess.fabs=Bio.Statistics.lowess-module.html#fabs,Variable Bio.distance.fabs=Bio.distance-module.html#fabs,Variable Bio.kNN.fabs=Bio.kNN-module.html#fabs"><a title="Bio.Affy.CelFile.fabs Bio.LogisticRegression.fabs Bio.MarkovModel.fabs Bio.MaxEntropy.fabs Bio.NaiveBayes.fabs Bio.Statistics.lowess.fabs Bio.distance.fabs Bio.kNN.fabs" class="py-name" href="#" onclick="return doclink('link-83', 'fabs', 'link-83');">fabs</a></tt><tt class="py-op">(</tt><tt class="py-name">ratio</tt><tt class="py-op">)</tt> <tt class="py-op"><</tt> <tt id="link-84" class="py-name"><a title="Bio.MaxEntropy.NEWTON_CONVERGE" class="py-name" href="#" onclick="return doclink('link-84', 'NEWTON_CONVERGE', 'link-5');">NEWTON_CONVERGE</a></tt><tt class="py-op">:</tt> <tt class="py-comment"># converged</tt> </tt> <a name="L191"></a><tt class="py-lineno">191</tt> <tt class="py-line"> <tt class="py-keyword">break</tt> </tt> <a name="L192"></a><tt class="py-lineno">192</tt> <tt class="py-line"> <tt class="py-name">iters</tt> <tt class="py-op">=</tt> <tt class="py-name">iters</tt> <tt class="py-op">+</tt> <tt class="py-number">1</tt> </tt> <a name="L193"></a><tt class="py-lineno">193</tt> <tt class="py-line"> <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt> <a name="L194"></a><tt class="py-lineno">194</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-string">"Newton's method did not converge"</tt> </tt> <a name="L195"></a><tt class="py-lineno">195</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">delta</tt> </tt> </div><a name="L196"></a><tt class="py-lineno">196</tt> <tt class="py-line"> </tt> <a name="_train_iis"></a><div id="_train_iis-def"><a name="L197"></a><tt class="py-lineno">197</tt> <a class="py-toggle" href="#" id="_train_iis-toggle" onclick="return toggle('_train_iis');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#_train_iis">_train_iis</a><tt class="py-op">(</tt><tt class="py-param">xs</tt><tt class="py-op">,</tt> <tt class="py-param">classes</tt><tt class="py-op">,</tt> <tt class="py-param">features</tt><tt class="py-op">,</tt> <tt class="py-param">f_sharp</tt><tt class="py-op">,</tt> <tt class="py-param">alphas</tt><tt class="py-op">,</tt> <tt class="py-param">e_empirical</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="_train_iis-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_train_iis-expanded"><a name="L198"></a><tt class="py-lineno">198</tt> <tt class="py-line"> <tt class="py-comment"># Do one iteration of hill climbing to find better alphas.</tt> </tt> <a name="L199"></a><tt class="py-lineno">199</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-comment"># This is a good function to parallelize.</tt> </tt> <a name="L200"></a><tt class="py-lineno">200</tt> <tt class="py-line"><tt class="py-comment"></tt> </tt> <a name="L201"></a><tt class="py-lineno">201</tt> <tt class="py-line"> <tt class="py-comment"># Pre-calculate P(y|x)</tt> </tt> <a name="L202"></a><tt class="py-lineno">202</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">p_yx</tt> <tt class="py-op">=</tt> <tt id="link-85" class="py-name"><a title="Bio.MaxEntropy._calc_p_class_given_x" class="py-name" href="#" onclick="return doclink('link-85', '_calc_p_class_given_x', 'link-43');">_calc_p_class_given_x</a></tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt id="link-86" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-86', 'features', 'link-31');">features</a></tt><tt class="py-op">,</tt> <tt class="py-name">alphas</tt><tt class="py-op">)</tt> </tt> <a name="L203"></a><tt class="py-lineno">203</tt> <tt class="py-line"> </tt> <a name="L204"></a><tt class="py-lineno">204</tt> <tt class="py-line"> <tt class="py-name">N</tt> <tt class="py-op">=</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt> </tt> <a name="L205"></a><tt class="py-lineno">205</tt> <tt class="py-line"> <tt class="py-name">newalphas</tt> <tt class="py-op">=</tt> <tt class="py-name">alphas</tt><tt class="py-op">[</tt><tt class="py-op">:</tt><tt class="py-op">]</tt> </tt> <a name="L206"></a><tt class="py-lineno">206</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt id="link-87" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-87', 'i', 'link-14');">i</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">alphas</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L207"></a><tt class="py-lineno">207</tt> <tt class="py-line"> <tt class="py-name">delta</tt> <tt class="py-op">=</tt> <tt id="link-88" class="py-name" targets="Function Bio.MaxEntropy._iis_solve_delta()=Bio.MaxEntropy-module.html#_iis_solve_delta"><a title="Bio.MaxEntropy._iis_solve_delta" class="py-name" href="#" onclick="return doclink('link-88', '_iis_solve_delta', 'link-88');">_iis_solve_delta</a></tt><tt class="py-op">(</tt><tt class="py-name">N</tt><tt class="py-op">,</tt> <tt id="link-89" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-89', 'features', 'link-31');">features</a></tt><tt class="py-op">[</tt><tt id="link-90" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-90', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-name">f_sharp</tt><tt class="py-op">,</tt> <tt class="py-name">e_empirical</tt><tt class="py-op">[</tt><tt id="link-91" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-91', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-name">p_yx</tt><tt class="py-op">)</tt> </tt> <a name="L208"></a><tt class="py-lineno">208</tt> <tt class="py-line"> <tt class="py-name">newalphas</tt><tt class="py-op">[</tt><tt id="link-92" class="py-name"><a title="Bio.PDB.Polypeptide.i" class="py-name" href="#" onclick="return doclink('link-92', 'i', 'link-14');">i</a></tt><tt class="py-op">]</tt> <tt class="py-op">+=</tt> <tt class="py-name">delta</tt> </tt> <a name="L209"></a><tt class="py-lineno">209</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">newalphas</tt> </tt> </div><a name="L210"></a><tt class="py-lineno">210</tt> <tt class="py-line"> </tt> <a name="L211"></a><tt class="py-lineno">211</tt> <tt class="py-line"> </tt> <a name="train"></a><div id="train-def"><a name="L212"></a><tt class="py-lineno">212</tt> <a class="py-toggle" href="#" id="train-toggle" onclick="return toggle('train');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="Bio.MaxEntropy-module.html#train">train</a><tt class="py-op">(</tt><tt class="py-param">training_set</tt><tt class="py-op">,</tt> <tt class="py-param">results</tt><tt class="py-op">,</tt> <tt class="py-param">feature_fns</tt><tt class="py-op">,</tt> <tt class="py-param">update_fn</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> </div><div id="train-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="train-expanded"><a name="L213"></a><tt class="py-lineno">213</tt> <tt class="py-line"> <tt class="py-docstring">"""train(training_set, results, feature_fns[, update_fn]) -> MaxEntropy object</tt> </tt> <a name="L214"></a><tt class="py-lineno">214</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt> <a name="L215"></a><tt class="py-lineno">215</tt> <tt class="py-line"><tt class="py-docstring"> Train a maximum entropy classifier on a training set.</tt> </tt> <a name="L216"></a><tt class="py-lineno">216</tt> <tt class="py-line"><tt class="py-docstring"> training_set is a list of observations. results is a list of the</tt> </tt> <a name="L217"></a><tt class="py-lineno">217</tt> <tt class="py-line"><tt class="py-docstring"> class assignments for each observation. feature_fns is a list of</tt> </tt> <a name="L218"></a><tt class="py-lineno">218</tt> <tt class="py-line"><tt class="py-docstring"> the features. These are callback functions that take an</tt> </tt> <a name="L219"></a><tt class="py-lineno">219</tt> <tt class="py-line"><tt class="py-docstring"> observation and class and return a 1 or 0. update_fn is a</tt> </tt> <a name="L220"></a><tt class="py-lineno">220</tt> <tt class="py-line"><tt class="py-docstring"> callback function that's called at each training iteration. It is</tt> </tt> <a name="L221"></a><tt class="py-lineno">221</tt> <tt class="py-line"><tt class="py-docstring"> passed a MaxEntropy object that encapsulates the current state of</tt> </tt> <a name="L222"></a><tt class="py-lineno">222</tt> <tt class="py-line"><tt class="py-docstring"> the training.</tt> </tt> <a name="L223"></a><tt class="py-lineno">223</tt> <tt class="py-line"><tt class="py-docstring"> </tt> </tt> <a name="L224"></a><tt class="py-lineno">224</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt> <a name="L225"></a><tt class="py-lineno">225</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">training_set</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L226"></a><tt class="py-lineno">226</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">ValueError</tt><tt class="py-op">,</tt> <tt class="py-string">"No data in the training set."</tt> </tt> <a name="L227"></a><tt class="py-lineno">227</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">training_set</tt><tt class="py-op">)</tt> <tt class="py-op">!=</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">results</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt> <a name="L228"></a><tt class="py-lineno">228</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">ValueError</tt><tt class="py-op">,</tt> <tt class="py-string">"training_set and results should be parallel lists."</tt> </tt> <a name="L229"></a><tt class="py-lineno">229</tt> <tt class="py-line"> </tt> <a name="L230"></a><tt class="py-lineno">230</tt> <tt class="py-line"> <tt class="py-comment"># Rename variables for convenience.</tt> </tt> <a name="L231"></a><tt class="py-lineno">231</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">xs</tt><tt class="py-op">,</tt> <tt class="py-name">ys</tt> <tt class="py-op">=</tt> <tt class="py-name">training_set</tt><tt class="py-op">,</tt> <tt class="py-name">results</tt> </tt> <a name="L232"></a><tt class="py-lineno">232</tt> <tt class="py-line"> </tt> <a name="L233"></a><tt class="py-lineno">233</tt> <tt class="py-line"> <tt class="py-comment"># Get a list of all the classes that need to be trained.</tt> </tt> <a name="L234"></a><tt class="py-lineno">234</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">classes</tt> <tt class="py-op">=</tt> <tt id="link-93" class="py-name"><a title="Bio.listfns" class="py-name" href="#" onclick="return doclink('link-93', 'listfns', 'link-1');">listfns</a></tt><tt class="py-op">.</tt><tt id="link-94" class="py-name"><a title="Bio.Crystal.Crystal.items Bio.EUtils.MultiDict._BaseMultiDict.items Bio.Entrez.SerialSet.items Bio.GenBank.NCBIDictionary.items Bio.Mindy.BaseDB.DictLookup.items Bio.Prosite.ExPASyDictionary.items Bio.Prosite.Prodoc.ExPASyDictionary.items Bio.PubMed.Dictionary.items Bio.SwissProt.SProt.ExPASyDictionary.items Bio.config.Registry.Registry.items Bio.listfns.items BioSQL.BioSeqDatabase.BioSeqDatabase.items BioSQL.BioSeqDatabase.DBServer.items Martel.Parser.MartelAttributeList.items" class="py-name" href="#" onclick="return doclink('link-94', 'items', 'link-50');">items</a></tt><tt class="py-op">(</tt><tt class="py-name">results</tt><tt class="py-op">)</tt> </tt> <a name="L235"></a><tt class="py-lineno">235</tt> <tt class="py-line"> <tt class="py-name">classes</tt><tt class="py-op">.</tt><tt id="link-95" class="py-name" targets="Method Bio.PDB.Residue.DisorderedResidue.sort()=Bio.PDB.Residue.DisorderedResidue-class.html#sort,Method Bio.PDB.Residue.Residue.sort()=Bio.PDB.Residue.Residue-class.html#sort,Method Bio.Sequencing.Ace.ACEFileRecord.sort()=Bio.Sequencing.Ace.ACEFileRecord-class.html#sort"><a title="Bio.PDB.Residue.DisorderedResidue.sort Bio.PDB.Residue.Residue.sort Bio.Sequencing.Ace.ACEFileRecord.sort" class="py-name" href="#" onclick="return doclink('link-95', 'sort', 'link-95');">sort</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt> <a name="L236"></a><tt class="py-lineno">236</tt> <tt class="py-line"> </tt> <a name="L237"></a><tt class="py-lineno">237</tt> <tt class="py-line"> <tt class="py-comment"># Cache values for all features.</tt> </tt> <a name="L238"></a><tt class="py-lineno">238</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt id="link-96" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-96', 'features', 'link-31');">features</a></tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt id="link-97" class="py-name" targets="Function Bio.MaxEntropy._eval_feature_fn()=Bio.MaxEntropy-module.html#_eval_feature_fn"><a title="Bio.MaxEntropy._eval_feature_fn" class="py-name" href="#" onclick="return doclink('link-97', '_eval_feature_fn', 'link-97');">_eval_feature_fn</a></tt><tt class="py-op">(</tt><tt class="py-name">fn</tt><tt class="py-op">,</tt> <tt class="py-name">training_set</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">)</tt> </tt> <a name="L239"></a><tt class="py-lineno">239</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-name">fn</tt> <tt class="py-keyword">in</tt> <tt class="py-name">feature_fns</tt><tt class="py-op">]</tt> </tt> <a name="L240"></a><tt class="py-lineno">240</tt> <tt class="py-line"> <tt class="py-comment"># Cache values for f#.</tt> </tt> <a name="L241"></a><tt class="py-lineno">241</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">f_sharp</tt> <tt class="py-op">=</tt> <tt id="link-98" class="py-name" targets="Function Bio.MaxEntropy._calc_f_sharp()=Bio.MaxEntropy-module.html#_calc_f_sharp"><a title="Bio.MaxEntropy._calc_f_sharp" class="py-name" href="#" onclick="return doclink('link-98', '_calc_f_sharp', 'link-98');">_calc_f_sharp</a></tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">training_set</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">classes</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt id="link-99" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-99', 'features', 'link-31');">features</a></tt><tt class="py-op">)</tt> </tt> <a name="L242"></a><tt class="py-lineno">242</tt> <tt class="py-line"> </tt> <a name="L243"></a><tt class="py-lineno">243</tt> <tt class="py-line"> <tt class="py-comment"># Pre-calculate the empirical expectations of the features.</tt> </tt> <a name="L244"></a><tt class="py-lineno">244</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">e_empirical</tt> <tt class="py-op">=</tt> <tt id="link-100" class="py-name" targets="Function Bio.MaxEntropy._calc_empirical_expects()=Bio.MaxEntropy-module.html#_calc_empirical_expects"><a title="Bio.MaxEntropy._calc_empirical_expects" class="py-name" href="#" onclick="return doclink('link-100', '_calc_empirical_expects', 'link-100');">_calc_empirical_expects</a></tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">,</tt> <tt class="py-name">ys</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt id="link-101" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-101', 'features', 'link-31');">features</a></tt><tt class="py-op">)</tt> </tt> <a name="L245"></a><tt class="py-lineno">245</tt> <tt class="py-line"> </tt> <a name="L246"></a><tt class="py-lineno">246</tt> <tt class="py-line"> <tt class="py-comment"># Now train the alpha parameters to weigh each feature.</tt> </tt> <a name="L247"></a><tt class="py-lineno">247</tt> <tt class="py-line"><tt class="py-comment"></tt> <tt class="py-name">alphas</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-number">0.0</tt><tt class="py-op">]</tt> <tt class="py-op">*</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt id="link-102" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-102', 'features', 'link-31');">features</a></tt><tt class="py-op">)</tt> </tt> <a name="L248"></a><tt class="py-lineno">248</tt> <tt class="py-line"> <tt class="py-name">iters</tt> <tt class="py-op">=</tt> <tt class="py-number">0</tt> </tt> <a name="L249"></a><tt class="py-lineno">249</tt> <tt class="py-line"> <tt class="py-keyword">while</tt> <tt class="py-name">iters</tt> <tt class="py-op"><</tt> <tt id="link-103" class="py-name"><a title="Bio.MaxEntropy.MAX_IIS_ITERATIONS" class="py-name" href="#" onclick="return doclink('link-103', 'MAX_IIS_ITERATIONS', 'link-2');">MAX_IIS_ITERATIONS</a></tt><tt class="py-op">:</tt> </tt> <a name="L250"></a><tt class="py-lineno">250</tt> <tt class="py-line"> <tt class="py-name">nalphas</tt> <tt class="py-op">=</tt> <tt id="link-104" class="py-name" targets="Function Bio.MaxEntropy._train_iis()=Bio.MaxEntropy-module.html#_train_iis"><a title="Bio.MaxEntropy._train_iis" class="py-name" href="#" onclick="return doclink('link-104', '_train_iis', 'link-104');">_train_iis</a></tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt id="link-105" class="py-name"><a title="Bio.GFF.Segment.features BioSQL.BioSeq.DBSeqRecord.features Martel.Expression.Expression.features Martel.Expression.ExpressionList.features Martel.Expression.FastFeature.features Martel.Expression.Group.features Martel.Expression.HeaderFooter.features Martel.Expression.MaxRepeat.features Martel.Expression.ParseRecords.features Martel.Expression.PassThrough.features" class="py-name" href="#" onclick="return doclink('link-105', 'features', 'link-31');">features</a></tt><tt class="py-op">,</tt> <tt class="py-name">f_sharp</tt><tt class="py-op">,</tt> </tt> <a name="L251"></a><tt class="py-lineno">251</tt> <tt class="py-line"> <tt class="py-name">alphas</tt><tt class="py-op">,</tt> <tt class="py-name">e_empirical</tt><tt class="py-op">)</tt> </tt> <a name="L252"></a><tt class="py-lineno">252</tt> <tt class="py-line"> <tt class="py-name">diff</tt> <tt class="py-op">=</tt> <tt id="link-106" class="py-name"><a title="Bio.GFF.FeatureAggregate.map" class="py-name" href="#" onclick="return doclink('link-106', 'map', 'link-7');">map</a></tt><tt class="py-op">(</tt><tt class="py-keyword">lambda</tt> <tt id="link-107" class="py-name"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-107', 'x', 'link-59');">x</a></tt><tt class="py-op">,</tt> <tt class="py-name">y</tt><tt class="py-op">:</tt> <tt class="py-name">math</tt><tt class="py-op">.</tt><tt id="link-108" class="py-name"><a title="Bio.Affy.CelFile.fabs Bio.LogisticRegression.fabs Bio.MarkovModel.fabs Bio.MaxEntropy.fabs Bio.NaiveBayes.fabs Bio.Statistics.lowess.fabs Bio.distance.fabs Bio.kNN.fabs" class="py-name" href="#" onclick="return doclink('link-108', 'fabs', 'link-83');">fabs</a></tt><tt class="py-op">(</tt><tt id="link-109" class="py-name"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-109', 'x', 'link-59');">x</a></tt><tt class="py-op">-</tt><tt class="py-name">y</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">alphas</tt><tt class="py-op">,</tt> <tt class="py-name">nalphas</tt><tt class="py-op">)</tt> </tt> <a name="L253"></a><tt class="py-lineno">253</tt> <tt class="py-line"> <tt class="py-name">diff</tt> <tt class="py-op">=</tt> <tt class="py-name">reduce</tt><tt class="py-op">(</tt><tt class="py-keyword">lambda</tt> <tt id="link-110" class="py-name"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-110', 'x', 'link-59');">x</a></tt><tt class="py-op">,</tt> <tt class="py-name">y</tt><tt class="py-op">:</tt> <tt id="link-111" class="py-name"><a title="Bio.MarkovModel.x Bio.Statistics.lowess.x" class="py-name" href="#" onclick="return doclink('link-111', 'x', 'link-59');">x</a></tt><tt class="py-op">+</tt><tt class="py-name">y</tt><tt class="py-op">,</tt> <tt class="py-name">diff</tt><tt class="py-op">,</tt> <tt class="py-number">0</tt><tt class="py-op">)</tt> </tt> <a name="L254"></a><tt class="py-lineno">254</tt> <tt class="py-line"> <tt class="py-name">alphas</tt> <tt class="py-op">=</tt> <tt class="py-name">nalphas</tt> </tt> <a name="L255"></a><tt class="py-lineno">255</tt> <tt class="py-line"> </tt> <a name="L256"></a><tt class="py-lineno">256</tt> <tt class="py-line"> <tt class="py-name">me</tt> <tt class="py-op">=</tt> <tt id="link-112" class="py-name" targets="Module Bio.MaxEntropy=Bio.MaxEntropy-module.html,Class Bio.MaxEntropy.MaxEntropy=Bio.MaxEntropy.MaxEntropy-class.html"><a title="Bio.MaxEntropy Bio.MaxEntropy.MaxEntropy" class="py-name" href="#" onclick="return doclink('link-112', 'MaxEntropy', 'link-112');">MaxEntropy</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt> <a name="L257"></a><tt class="py-lineno">257</tt> <tt class="py-line"> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">alphas</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt class="py-name">me</tt><tt class="py-op">.</tt><tt class="py-name">feature_fns</tt> <tt class="py-op">=</tt> <tt class="py-name">alphas</tt><tt class="py-op">,</tt> <tt class="py-name">classes</tt><tt class="py-op">,</tt> <tt class="py-name">feature_fns</tt> </tt> <a name="L258"></a><tt class="py-lineno">258</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">update_fn</tt> <tt class="py-keyword">is</tt> <tt class="py-keyword">not</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt> <a name="L259"></a><tt class="py-lineno">259</tt> <tt class="py-line"> <tt class="py-name">update_fn</tt><tt class="py-op">(</tt><tt class="py-name">me</tt><tt class="py-op">)</tt> </tt> <a name="L260"></a><tt class="py-lineno">260</tt> <tt class="py-line"> </tt> <a name="L261"></a><tt class="py-lineno">261</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">diff</tt> <tt class="py-op"><</tt> <tt id="link-113" class="py-name"><a title="Bio.MaxEntropy.IIS_CONVERGE" class="py-name" href="#" onclick="return doclink('link-113', 'IIS_CONVERGE', 'link-3');">IIS_CONVERGE</a></tt><tt class="py-op">:</tt> <tt class="py-comment"># converged</tt> </tt> <a name="L262"></a><tt class="py-lineno">262</tt> <tt class="py-line"> <tt class="py-keyword">break</tt> </tt> <a name="L263"></a><tt class="py-lineno">263</tt> <tt class="py-line"> <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt> <a name="L264"></a><tt class="py-lineno">264</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-string">"IIS did not converge"</tt> </tt> <a name="L265"></a><tt class="py-lineno">265</tt> <tt class="py-line"> </tt> <a name="L266"></a><tt class="py-lineno">266</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">me</tt> </tt> </div><a name="L267"></a><tt class="py-lineno">267</tt> <tt class="py-line"> </tt><script type="text/javascript"> <!-- expandto(location.href); // --> </script> </pre> <br /> <!-- ==================== NAVIGATION BAR ==================== --> <table class="navbar" border="0" width="100%" cellpadding="0" bgcolor="#a0c0ff" cellspacing="0"> <tr valign="middle"> <!-- Tree link --> <th> <a href="module-tree.html">Trees</a> </th> <!-- Index link --> <th> <a href="identifier-index.html">Indices</a> </th> <!-- Help link --> <th> <a href="help.html">Help</a> </th> <th class="navbar" width="100%"></th> </tr> </table> <table border="0" cellpadding="0" cellspacing="0" width="100%%"> <tr> <td align="left" class="footer"> Generated by Epydoc 3.0.1 on Mon Sep 15 09:27:01 2008 </td> <td align="right" class="footer"> <a target="mainFrame" href="http://epydoc.sourceforge.net" >http://epydoc.sourceforge.net</a> </td> </tr> </table> <script type="text/javascript"> <!-- // Private objects are initially displayed (because if // javascript is turned off then we want them to be // visible); but by default, we want to hide them. So hide // them unless we have a cookie that says to show them. checkCookie(); // --> </script> </body> </html>