Blogger - 自分のブログの全ての記事の取得 その2
2008/04/05
blogger
java
Bloggerから全ての記事を取得します。
で、やりたいことは、取得したデータから記事ごとにつけられているタグ(ラベル)をベクター化したいので、その準備です。
で、やりたいことはこんな感じの表にしたいわけです。
※PDFをキャプチャしたものです。出典:Information Retrieval
Information Retrieval - 索引語の自動抽出
ただし、タグ(ラベル)なので出現頻度数は1か0のどちらかとなります。
とりあえず、データを使うための準備。
Google APIを使っています。
package mygg;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Query;
import com.google.gdata.data.Category;
import com.google.gdata.data.DateTime;
import com.google.gdata.data.Entry;
import com.google.gdata.data.Feed;
import com.google.gdata.util.AuthenticationException;
import com.google.gdata.util.ServiceException;
public class Blogger {
public static GoogleService createGoogleService(String user, String password)
throws AuthenticationException {
GoogleService service = new GoogleService("blogger",
"deiji-blogger-app");
service.setUserCredentials(user, password);
return service;
}
public static List<Entry> allEntry(GoogleService service, String blogId)
throws IOException, ServiceException {
URL feedUrl = new URL("http://www.blogger.com/feeds/" + blogId
+ "/posts/full");
Query query = new Query(feedUrl);
DateTime min = DateTime.parseDateTime("1999-12-31T23:59:59");
DateTime max = DateTime.now();
List<Entry> returnList = new ArrayList<Entry>();
int index = 1;// !!
do {
query.setUpdatedMin(min);
query.setUpdatedMax(max);
query.setStartIndex(index);
Feed resultFeed = service.query(query, Feed.class);
List<Entry> entries = resultFeed.getEntries();
if (!(0 < entries.size()))
break;
index += entries.size();
returnList.addAll(entries);
} while (true);
return returnList;
}
public static void printAllPosts(GoogleService myService, String blogId)
throws ServiceException, IOException {
List<Entry> list = allEntry(myService, blogId);
// order older
Collections.reverse(list);
final String TAB = "\t";
for (Entry entry : list) {
System.out.print(entry.getPublished() + TAB + entry.getId() + TAB
+ entry.getTitle().getPlainText());
Set<Category> set = entry.getCategories();
for (Category category : set) {
System.out.print(TAB + category.getTerm());
}
System.out.println();
}
System.out.println();
}
public static void printTermOfAllPosts(GoogleService myService,
String blogId) throws ServiceException, IOException {
List<Entry> list = allEntry(myService, blogId);
// order older
Collections.reverse(list);
final String TAB = "\t";
Map<Entry, Map<String, Integer>> map = Support.allTerm_b(list);
Set<java.util.Map.Entry<Entry, Map<String, Integer>>> set = map
.entrySet();
Iterator<java.util.Map.Entry<Entry, Map<String, Integer>>> iterator = set
.iterator();
while (iterator.hasNext()) {
Map.Entry<com.google.gdata.data.Entry, java.util.Map<java.lang.String, java.lang.Integer>> entry = (Map.Entry<com.google.gdata.data.Entry, java.util.Map<java.lang.String, java.lang.Integer>>) iterator
.next();
System.out.println(entry.getKey().getTitle().getPlainText() + TAB + entry.getValue());
}
}
static class Support {
public static Set<String> allTerm(List<Entry> list) {
Set<String> returnSet = new HashSet<String>();
for (Entry entry : list) {
Set<Category> set = entry.getCategories();
for (Category category : set) {
returnSet.add(category.getTerm());
}
}
return returnSet;
}
public static Map<Entry, Map<String, Integer>> allTerm_b(
List<Entry> list) {
Set<String> termSet = allTerm(list);
Map<String, Integer> termMap = new HashMap<String, Integer>();
for (String string : termSet) {
termMap.put(string, 0);
}
Map<Entry, Map<String, Integer>> map = new HashMap<Entry, Map<String, Integer>>();
for (Entry entry : list) {
Map<String, Integer> temp = new HashMap<String, Integer>(
termMap);
Set<Category> set = entry.getCategories();
for (Category category : set) {
temp.put(category.getTerm(), 1);
}
map.put(entry, temp);
}
return map;
}
}
}
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Query;
import com.google.gdata.data.Category;
import com.google.gdata.data.DateTime;
import com.google.gdata.data.Entry;
import com.google.gdata.data.Feed;
import com.google.gdata.util.AuthenticationException;
import com.google.gdata.util.ServiceException;
public class Blogger {
public static GoogleService createGoogleService(String user, String password)
throws AuthenticationException {
GoogleService service = new GoogleService("blogger",
"deiji-blogger-app");
service.setUserCredentials(user, password);
return service;
}
public static List<Entry> allEntry(GoogleService service, String blogId)
throws IOException, ServiceException {
URL feedUrl = new URL("http://www.blogger.com/feeds/" + blogId
+ "/posts/full");
Query query = new Query(feedUrl);
DateTime min = DateTime.parseDateTime("1999-12-31T23:59:59");
DateTime max = DateTime.now();
List<Entry> returnList = new ArrayList<Entry>();
int index = 1;// !!
do {
query.setUpdatedMin(min);
query.setUpdatedMax(max);
query.setStartIndex(index);
Feed resultFeed = service.query(query, Feed.class);
List<Entry> entries = resultFeed.getEntries();
if (!(0 < entries.size()))
break;
index += entries.size();
returnList.addAll(entries);
} while (true);
return returnList;
}
public static void printAllPosts(GoogleService myService, String blogId)
throws ServiceException, IOException {
List<Entry> list = allEntry(myService, blogId);
// order older
Collections.reverse(list);
final String TAB = "\t";
for (Entry entry : list) {
System.out.print(entry.getPublished() + TAB + entry.getId() + TAB
+ entry.getTitle().getPlainText());
Set<Category> set = entry.getCategories();
for (Category category : set) {
System.out.print(TAB + category.getTerm());
}
System.out.println();
}
System.out.println();
}
public static void printTermOfAllPosts(GoogleService myService,
String blogId) throws ServiceException, IOException {
List<Entry> list = allEntry(myService, blogId);
// order older
Collections.reverse(list);
final String TAB = "\t";
Map<Entry, Map<String, Integer>> map = Support.allTerm_b(list);
Set<java.util.Map.Entry<Entry, Map<String, Integer>>> set = map
.entrySet();
Iterator<java.util.Map.Entry<Entry, Map<String, Integer>>> iterator = set
.iterator();
while (iterator.hasNext()) {
Map.Entry<com.google.gdata.data.Entry, java.util.Map<java.lang.String, java.lang.Integer>> entry = (Map.Entry<com.google.gdata.data.Entry, java.util.Map<java.lang.String, java.lang.Integer>>) iterator
.next();
System.out.println(entry.getKey().getTitle().getPlainText() + TAB + entry.getValue());
}
}
static class Support {
public static Set<String> allTerm(List<Entry> list) {
Set<String> returnSet = new HashSet<String>();
for (Entry entry : list) {
Set<Category> set = entry.getCategories();
for (Category category : set) {
returnSet.add(category.getTerm());
}
}
return returnSet;
}
public static Map<Entry, Map<String, Integer>> allTerm_b(
List<Entry> list) {
Set<String> termSet = allTerm(list);
Map<String, Integer> termMap = new HashMap<String, Integer>();
for (String string : termSet) {
termMap.put(string, 0);
}
Map<Entry, Map<String, Integer>> map = new HashMap<Entry, Map<String, Integer>>();
for (Entry entry : list) {
Map<String, Integer> temp = new HashMap<String, Integer>(
termMap);
Set<Category> set = entry.getCategories();
for (Category category : set) {
temp.put(category.getTerm(), 1);
}
map.put(entry, temp);
}
return map;
}
}
}
: