shiba
 All Data Structures Files Functions Variables Macros Pages
newick.c
Go to the documentation of this file.
1 
5 #include "shiba.h"
6 
7 phylo parseNewick(char *in) {
8 
9  phylo p;
10  int lbrack = 0;
11  int rbrack = 0;
12  int comma = 0;
13 
14  // dimension the phylo struct
15  for (int i = 0; i < strlen(in); i++)
16  {
17  // printf("%c", in[i]);
18  if (in[i] == 40) lbrack++;
19  else if (in[i] == 41) rbrack++;
20  else if (in[i] == 44) comma++;
21  }
22  if (lbrack != rbrack) error("Imbalanced parentheses in phylogeny file");
23 
24  p.nnodes = lbrack + comma + 1;
25  p.parent = mem1d_i(p.nnodes);
26  p.ndaughter = mem1d_i(p.nnodes);
27  p.depth = mem1d_i(p.nnodes);
28  p.bl = mem1d_d(p.nnodes);
29  p.taxon = mem2d1_c(p.nnodes);
30  p.age = mem1d_d(p.nnodes);
31 
32  // Move through the newick format tree character by character
33  int i = 0;
34  // start outside parentheses, one hypothetical node proximal to root node
35  int node = -1;
36  int parent = -2;
37  int nodeCount = 0;
38  int tmpn;
39 
40  while ((i < strlen(in)) && (in[i] != 59))
41  {
42  // descend a branch, making new node for the parenthesis descended thru
43  if (in[i] == 40) // "("
44  {
45  parent = node;
46  // make a new node, set node to index of final row
47  nodeCount++;
48  node = nodeCount-1;
49  p.parent[node] = parent;
50  // first run case, since root has no parent data
51  if (node == 0) {
52  p.depth[node] = 0;
53  } else {
54  p.depth[node] = p.depth[parent] + 1;
55  p.ndaughter[parent]++;
56  }
57  /* printf("%c DOWN %d(%d)\n", in[i], node, parent);
58  printf(" new node %d, parent %d, depth %d\n",
59  node, p.parent[node], p.depth[node]); */
60  i++;
61  }
62 
63  // sibling taxon, ignore
64  else if (in[i] == 44) // ","
65  {
66  node = parent;
67  parent = p.parent[node];
68  // printf("%c ACROSS %d(%d)\n", in[i], node, parent);
69  i++;
70  }
71 
72  // back up a node to parent, keep track of locn with node
73  else if (in[i] == 41) // ")"
74  {
75  node = parent;
76  parent = p.parent[node];
77  // printf("%c UP %d(%d)\n", in[i], node, parent);
78  i++;
79  }
80 
81  else
82  {
83  // Now, check for new taxa, or new interior name
84 
86 
87  // Is there nodename here? I.e., not `:' or `['
88  if ((in[i] != 58) && (in[i] != 91))
89  {
90  // Is it a new terminal? I.e., not following a `)'
91  if (in[i-1] != 41)
92  {
93  // creat new node
94  parent = node;
95  nodeCount++;
96  node = nodeCount-1;
97  p.parent[node] = parent;
98  p.depth[node] = p.depth[parent] + 1;
99  p.ndaughter[parent]++;
100 
101  /* printf("%c TERM %d(%d)\n", in[i], node, parent);
102  printf(" new node %d, parent %d, depth %d\n", node,
103  p.parent[node], p.depth[node]); */
104  }
105 
106  char *tmp = NULL; tmpn = 0;
107  // check for name, I.e. not `:' nor `['
108  while ((in[i] != 58) && (in[i] != 91) &&
109  (in[i] != 44) && (in[i] != 41) && (in[i] != 40) &&
110  (in[i] != 59))
111  {
112  if (!tmpn) {
113  asprintf(&tmp, "%c", in[i]); tmpn++;
114  } else {
115  char *tmp2 = tmp;
116  asprintf(&tmp, "%s%c", tmp2, in[i]); tmpn++;
117  free(tmp2);
118  }
119  i++;
120  }
121  // was there any taxon name?
122  asprintf(&p.taxon[node], "%s", tmp);
123  // printf(" taxon name %s\n", tmp);
124  free(tmp);
125  }
126 
127  // are there bls?
128  if (in[i] == 58)
129  {
130  i++; // skip over delimiter `:'
131  char *tmp = NULL; tmpn = 0;
132  // bead bl string, I.e. not `['
133  while ((in[i] != 91) &&
134  (in[i] != 44) && (in[i] != 41) && (in[i] != 40) &&
135  (in[i] != 59)) // watch out for final `;'
136  {
137  if (!tmpn) {
138  asprintf(&tmp, "%c", in[i]); tmpn++;
139  } else {
140  Sasprintf(tmp, "%s%c", tmp, in[i]); tmpn++;
141  }
142  i++;
143  }
144  p.bl[node] = atof(tmp);
145  // printf(" taxon bl %f\n", p.bl[node]);
146  free(tmp);
147  }
148 
149  // are there notes?
150  if (in[i] == 91)
151  // Discard note
152  while ((in[i] != 44) && (in[i] != 41) && (in[i] != 40) &&
153  (in[i] != 59))
154  i++;
155  }
156  }
157 
158  /* printf("%s\n", in);
159  for (int n = 0; n < p.nnodes; n++)
160  printf("%3d %3d %3d %3d %5.2f %s\n", n, p.parent[n], p.depth[n],
161  p.ndaughter[n], p.bl[n], p.taxon[n]); */
162 
163 
164  return p;
165 }
166 
200 {
201 
202  // Check first for ultrametic tree
203  double ageToRootStem = 0;
204  double tmp;
205  int node;
206 
207  Lineages = p.nnodes;
208  for (int i = 0; i < p.nnodes; i++)
209  if (!p.ndaughter[i]) {
210  tmp = 0.0;
211  node = i;
212  while (p.parent[node] != -1) {
213  p.age[node] = tmp;
214  tmp += p.bl[node];
215  node = p.parent[node];
216  }
217  if (!ageToRootStem) ageToRootStem = tmp;
218  else if (tmp != ageToRootStem) error("tree is not ultrametric");
219  }
220  //add stem
221  p.age[0] = ageToRootStem;
222  ageToRootStem += p.bl[0];
223 
224  // Is the tree stem longer than the time slices?
225  if (ageToRootStem < RealTime[0])
226  error("tree stem not old enough for geo age");
227 
228  // Slice the phylo into periods defined by geo
229 
231 
232  double periodOld, periodYng, edgeOld, edgeYng;
233  for (int i = 0; i < p.nnodes; i++)
234  {
235  for (int j = 0; j < Times; j++)
236  {
237  // expressed in ages
238  periodOld = RealTime[j];
239  periodYng = (j < Times-1) ? RealTime[j+1] : 0.0 ;
240  edgeOld = p.age[i] + p.bl[i];
241  edgeYng = p.age[i];
242 
243  // Rules:
244  // if the edge passes through the period period beginning,
245  // and either spans the period of end in the period
246  if ((edgeOld >= periodOld) &&
247  ((edgeYng <= periodYng) ||
248  ((edgeYng >= periodYng) && (edgeYng < periodOld))))
249  LineagePeriod[i][j] = 1;
250  /* printf(
251  "phy%02d %4.1f -- %4.1f slc%02d %4.1f -- %4.1f ---> [%d]\n",
252  i, edgeOld, edgeYng, j, periodOld, periodYng,
253  LineagePeriod[i][j]); */
254  }
255  }
256 
257  // create a parent lineage to daughter lineage array. Needed because some
258  // edges have dissappeared if both beginning and ending within a period.
259 
262 
263  for (int i = 0; i < Lineages; i++)
264  for (int t = Times - 1; t > 0; t--) // note stepping until one before stem
265  // if there is a LP at time t
266  if (LineagePeriod[i][t])
267  // is there a LP at time t-1?
268  if (!LineagePeriod[i][t-1]) {
269  // if not, check who is the parent:
270  int node = p.parent[i];
271  while(!LineagePeriod[node][t-1]) node = p.parent[node];
272  /* printf("lin%2d time%2d is daughter of lin%2d time%2d\n",
273  i, t, node, t-1); */
274  LineageDaughtersN[node] += 1;
275  LineageDaughters[node] = (int *) realloc( LineageDaughters[node],
276  LineageDaughtersN[node] * sizeof(int));
277  LineageDaughters[node][LineageDaughtersN[node]-1]=i;
278  }
279 
280  // Convert Extant (by taxa) to LineageExtant (by lineage)
281  LineageExtant = mem3d_i(Lineages, Times, Spaces);
282  int i;
283  for (int x = 0; x < Taxa; x++) {
284  for (i = 0; i < p.nnodes; i++)
285  if (p.taxon[i])
286  if (!strcmp(Taxon[x], p.taxon[i])) break;
287  for (int t = 0; t < Times; t++)
288  for (int s = 0; s < Spaces; s++)
289  if (Extant[x][t][s])
290  LineageExtant[i][t][s] = 1;
291  }
292 }