shiba
 All Data Structures Files Functions Variables Macros Pages
Input File Format

Because of the complexity of the data required as input, I chose XML as the most appropriate input file format. For small cases, it is probably easiest to write the XML ‘by hand’ (i.e., in an XML-aware text editor, e.g. emacs with nXML). For larger datasets, you can generate the repetitive lines with a script, or even with concatenate functions in a spreadsheet. Note that the XML attributes identifying the spaces, times, etc can be any text strings, so can actually be the same as the names of the taxa, spaces, etc.

While it may take a bit of work to get the data into this format, the great benefit of XML is that it can be validated against a schema. I suggest trang and jing:

  $ java -jar trang.jar shibaInput.rnc shibaInput.rng
  $ java -jar jing.jar shibaInput.rng shibaInput.xml

Example XML file (shibaInput.xml)

<?xml version="1.0" encoding="UTF-8"?>
<shibaInput>
<!--
Example input: Inter-space
distance
Distrib: A BC BC A BC BC <~~~~~~~>
: Sp1 Sp2 Sp3 Sp1 Sp2 Sp3
0 ..:..|...|...|.....|...|...|.. A BC
: | | | | | |
10 ..:..|...|...|.....|...+~+~+.. A B C
: | | | | |
20 ..:..|...|...|.....+~~+~~*.... A B C
: | | | |
30 ..:..|...+~+~+........|....... A B C
: | | |
40 ..:..+~~+~~*..........|....... A B C
: | |
50 ..:.....|.............|....... AB C
phylo1 phylo2
Result:
* in ABC * only in BC
-->
<time n="6">
<periodStart id="t1">50</periodStart>
<periodStart id="t2">40</periodStart>
<periodStart id="t3">30</periodStart>
<periodStart id="t4">20</periodStart>
<periodStart id="t5">10</periodStart>
<periodStart id="t6">0</periodStart>
</time>
<space n="3">
<spaceName id="s1">AreaA</spaceName>
<spaceName id="s2">AreaB</spaceName>
<spaceName id="s3">AreaC</spaceName>
</space>
<areas>
<area time="t1" space="s1">10</area>
<area time="t1" space="s2">4</area>
<area time="t1" space="s3">4</area>
<area time="t2" space="s1">10</area>
<area time="t2" space="s2">4</area>
<area time="t2" space="s3">4</area>
<area time="t3" space="s1">10</area>
<area time="t3" space="s2">4</area>
<area time="t3" space="s3">4</area>
<area time="t4" space="s1">10</area>
<area time="t4" space="s2">4</area>
<area time="t4" space="s3">4</area>
<area time="t5" space="s1">10</area>
<area time="t5" space="s2">4</area>
<area time="t5" space="s3">4</area>
<area time="t6" space="s1">10</area>
<area time="t6" space="s2">4</area>
<area time="t6" space="s3">4</area>
</areas>
<distances>
<dist time="t1" from="s1" to="s2">0</dist>
<dist time="t1" from="s1" to="s3">2000</dist>
<dist time="t1" from="s2" to="s3">2000</dist>
<dist time="t2" from="s1" to="s2">200</dist>
<dist time="t2" from="s1" to="s3">2000</dist>
<dist time="t2" from="s2" to="s3">1800</dist>
<dist time="t3" from="s1" to="s2">400</dist>
<dist time="t3" from="s1" to="s3">2000</dist>
<dist time="t3" from="s2" to="s3">1600</dist>
<dist time="t4" from="s1" to="s2">1600</dist>
<dist time="t4" from="s1" to="s3">2000</dist>
<dist time="t4" from="s2" to="s3">400</dist>
<dist time="t5" from="s1" to="s2">1800</dist>
<dist time="t5" from="s1" to="s3">2000</dist>
<dist time="t5" from="s2" to="s3">200</dist>
<dist time="t6" from="s1" to="s2">2000</dist>
<dist time="t6" from="s1" to="s3">2000</dist>
<dist time="t6" from="s2" to="s3">0</dist>
</distances>
<taxa n="3">
<taxon id="x1">Sp1</taxon>
<taxon id="x2">Sp2</taxon>
<taxon id="x3">Sp3</taxon>
</taxa>
<phylo n="2">
<newick id="p1">(Sp1:40,(Sp2:30,Sp3:30):10):10;</newick>
<newick id="p2">(Sp1:20,(Sp2:10,Sp3:10):10):30;</newick>
</phylo>
<extantIn> <!-- included fossils -->
<extant taxon="x1" time="t6" space="s1"/>
<extant taxon="x2" time="t6" space="s2"/>
<extant taxon="x2" time="t6" space="s3"/>
<extant taxon="x3" time="t6" space="s2"/>
<extant taxon="x3" time="t6" space="s3"/>
</extantIn>
<nStartSpaces>1</nStartSpaces>
<startSpaces>
<allowed space="s1"/>
<allowed space="s2"/>
<allowed space="s3"/>
</startSpaces>
<maxRuns>10000000</maxRuns>
<stopAfterSuccess>100</stopAfterSuccess>
<probSurvA>1.0</probSurvA>
<probSurvB>2.0</probSurvB>
<probDispA>1.0</probDispA>
<probDispB>2.0</probDispB>
</config>
</shibaInput>

Relax NG Compact schema (shibaInput.rnc)

default namespace = ""
start =
element shibaInput {
element time {
attribute n { xsd:integer },
element periodStart {
attribute id { xsd:ID },
xsd:float
}+
},
element space {
attribute n { xsd:integer },
element spaceName {
attribute id { xsd:ID },
text
}+
},
element areas {
element area {
attribute space { xsd:IDREF },
attribute time { xsd:IDREF },
xsd:integer
}+
},
element distances {
element dist {
attribute from { xsd:IDREF },
attribute time { xsd:IDREF },
attribute to { xsd:IDREF },
xsd:integer
}+
},
element taxa {
attribute n { xsd:integer },
element taxon {
attribute id { xsd:ID },
xsd:NCName
}+
},
element phylo {
attribute n { xsd:integer },
element newick {
attribute id { xsd:ID },
text
}+
},
element extantIn {
element extant {
attribute space { xsd:IDREF },
attribute taxon { xsd:IDREF },
attribute time { xsd:IDREF }
}+
},
element config {
element nStartSpaces { xsd:integer },
element startSpaces {
element allowed {
attribute space { xsd:IDREF }
}+
},
element maxRuns { xsd:integer },
element stopAfterSuccess { xsd:integer },
element probSurvA { xsd:decimal },
element probSurvB { xsd:decimal },
element probDispA { xsd:decimal },
element probDispB { xsd:decimal }
}
}