001 /**
002 * File : Ieee2Bibtex.java
003 * Project : Ieee2Bibtex
004 * Copyright : Copyright (c) 2006
005 * Company : Mobile Department - Eurecom Institute
006 * Creation : 7:36:24 AM - Oct 30, 2007
007 */
008 import java.io.BufferedReader;
009 import java.io.IOException;
010 import java.io.InputStreamReader;
011 import java.net.URL;
012 import java.net.URLConnection;
013 import java.util.ArrayList;
014 import java.util.HashSet;
015 import java.util.StringTokenizer;
016 import java.util.Vector;
018 import java.applet.Applet;
019 import java.awt.*;
021 /**
022 * The class Ieee2Bibtex.java creates bibtex entries
023 * from the pages of the Ieeexplore website
024 *
025 *
026 * @author daniel.camara@eurecom.fr
027 * @version 1.0
028 */
029 public class Ieee2Bibtex extends Applet {
031 private static final long serialVersionUID = 1L;
032 private TextArea searchField;
033 private TextArea responseField;
034 protected Button submitButton;
035 protected String baseURL, serviceName, frame = "Results";
037 /**
038 * Initializes the applet and creates its interface
039 */
040 public void init() {
041 searchField = new TextArea(5, 60);
042 responseField = new TextArea(10, 70);
044 ScrollPane scroll_panel = new ScrollPane(ScrollPane.SCROLLBARS_AS_NEEDED);
045 scroll_panel.add(searchField);
046 submitButton = new Button("Convert URLS into bibtex Entries");
048 Panel p = new Panel(new BorderLayout());
049 p.setLayout(new BorderLayout());
050 p.add(new Label("Search String: "), BorderLayout.WEST);
051 p.add(scroll_panel, BorderLayout.CENTER);
052 p.add(submitButton, BorderLayout.SOUTH);
054 setLayout(new BorderLayout());
055 add(p, BorderLayout.NORTH);
057 ScrollPane scroll_response = new ScrollPane(ScrollPane.SCROLLBARS_AS_NEEDED);
058 scroll_response.add(responseField);
059 add(scroll_response, BorderLayout.CENTER);
061 }
063 /**
064 * Calls the procedure when the button is clicked
065 */
066 public boolean action(Event event, Object object) {
067 grabFormat(searchField.getText());
068 return (true);
069 }
071 /**
072 * Does the whole magic.
073 * 1 - Threats the entries
074 * 2 - Grabs the pages
075 * 3 - Threat the pages
076 * 4 - Try to clean the results
077 * 5 - Print the formated bibtex entry
078 */
079 public void grabFormat(String searchString) {
080 try {
081 HashSet ignores = new HashSet();
082 ignores.add("<br>");
083 ignores.add("</span>");
084 ignores.add("</p>");
086 String outputString = "";
088 String s[] = cleanReferences(searchString);
089 responseField.setText("% -> "+ s.length+"\n");
091 for (int j = 0; j < s.length; j++) {
093 URL url = new URL(s[j].trim());
094 URLConnection urlConnection = url.openConnection();
095 BufferedReader in = new BufferedReader(new InputStreamReader(
096 urlConnection.getInputStream()));
097 responseField.append("% "+ (j+1) +"---\n");
099 if (in != null) {
100 String line;
101 boolean foundEntry = false;
102 // read until EOF
103 while ((line = in.readLine()) != null && !foundEntry) {
104 if (line.contains("<td><p><span class=\"headNavBlueXLarge2\">")) {
105 while ((line = in.readLine()).trim().length() == 0);
107 foundEntry = true;
108 String title;
109 if (line.contains("<"))
110 title = line.trim().substring(0,line.trim().indexOf("<"));
111 else
112 title = line.trim();
113 while (!(line = in.readLine()).contains("<span class=\"bodyCopyBlackLargeSpaced\">"));
114 while ((line = in.readLine()).trim().length() == 0);
116 ArrayList authors = new ArrayList();
117 do {
118 if (line.contains("<a href"))
119 authors.add(line.substring(line.indexOf("\">") + 2, line.indexOf("</a")));
120 else if (line.trim().length() > 0&& !ignores.contains(line.trim()))
121 authors.add(line.trim());
122 } while (!(line = in.readLine())
123 .contains("<p class=\"bodyCopyBlackLargeSpaced\">"));
125 while (!(line = in.readLine()).contains("This paper"));
127 String magazine = line.substring(line.indexOf("<strong>") + 8,
128 line.indexOf("</strong>"));
129 while ((line = in.readLine()).trim().length() == 0);
131 String date = line.substring(line.indexOf("Publication Date:"),
132 line.indexOf("<br>"));
133 while ((line = in.readLine()).trim().length() == 0);
135 String volume = line.trim();
136 while ((line = in.readLine()).trim().length() == 0);
138 while (!line.contains("ISSN")&& !line.contains("ISBN")
139 && !line.contains("Posted online")) {
140 if (line.contains("</a"))
141 volume += line.substring(line.indexOf(">"), line.indexOf("</a"));
142 else if (line.trim().length() > 0)
143 volume += line.trim();
144 line = in.readLine();
145 }
147 outputString = "@inproceedings{ "+
148 (authors.get(0) + date.substring((date.length() - 4),
149 date.length())) + ",\n";
150 outputString += " author = { ";
152 for (int i = 0; i < authors.size(); i++)
153 outputString += authors.get(i) + " ";
155 outputString += "},\n";
157 outputString += " title = {" + title
158 + "},\n";
159 magazine = invertBookTitle(magazine);
160 outputString += " booktitle = { " + magazine
161 + "},\n";
162 outputString += " year = {" + date + "},\n";
163 outputString += " volume = {" + volume
164 + "},\n";
165 outputString += "}\n";
167 outputString = cleanOutPut(outputString);
168 // System.out.println(outputString);
169 responseField.append(outputString);
170 responseField.append("%\n");
171 outputString = "";
172 }
174 }
175 in.close();
176 }
177 }
178 responseField.append("\n% --- Done ---");
180 } catch (Exception e) {
181 responseField.append("Error handling the urls : " + e.getMessage());
182 }
183 }
185 /**
186 * Breaks the references and clean them, removes empty lines and
187 * references to sites different from the ieeexplore ones.
188 *
189 * @param pagesToGetReferences
190 * @return
191 */
192 private static String[] cleanReferences(String pagesToGetReferences) {
193 Vector tmp = new Vector();
194 StringTokenizer st = new StringTokenizer(pagesToGetReferences, "\n");
195 // String toEvaluateURLs[] = pagesToGetReferences.split("\n");
197 while(st.hasMoreTokens()){
198 tmp.add(st.nextToken());
199 }
201 for(int i=0; i<tmp.size();i++){
202 String evaluatedURL = ((String)tmp.get(i)).trim();
203 if(evaluatedURL.isEmpty() ||
204 !evaluatedURL.startsWith("http://ieeexplore.ieee.org/")){
205 tmp.remove(i);
206 i--;
207 }
208 }
210 String[] stringArrayType = new String[tmp.size()];
211 return (String[]) tmp.toArray(stringArrayType);
213 }
215 /**
216 * Try to put the conference name in the right order, or at least to
217 * decrease a little bit the job :)
218 *
219 * @param magazine
220 * @return
221 */
222 private static String invertBookTitle(String magazine) {
223 int inversionIndex = magazine.indexOf(",");
224 if (inversionIndex > 0)
225 return magazine.substring(inversionIndex + 1) + " "
226 + magazine.substring(0, inversionIndex);
227 else
228 return magazine;
229 }
231 /**
232 * Cleans some common trash that often appears on the bibtex entries.
233 *
234 * ### DON'T mean to be perfect, just to help with the more comon thints.
235 *
236 * @param outputString - String that contains the bibtex entry
237 *
238 * @return - the output clean
239 */
240 private static String cleanOutPut(String outputString) {
241 outputString = outputString.replaceAll("( )", " ");
242 outputString = outputString.replaceAll("(<br>Location: )", " ");
243 outputString = outputString.replaceAll("(<br>Meeting Date: )", " ");
244 outputString = outputString.replaceAll("(\\{Publication Date: )", "{ ");
245 outputString = outputString.replaceAll("(</span>)", "");
246 outputString = outputString.replaceAll("(On page\\(s\\): )", " p. ");
247 outputString = outputString.replaceAll("(>Issue: )", " I. ");
248 outputString = outputString.replaceAll("(Volume: )", " V. ");
249 outputString = outputString.replaceAll("(</p>)", " ");
250 outputString = outputString.replaceAll("(<br>)", " ");
251 return outputString;
252 }
253 }