Source code for dhg.data.flickr

from typing import Optional
from functools import partial

from dhg.datapipe import load_from_pickle, norm_ft, to_tensor, to_long_tensor, to_bool_tensor

from .base import BaseData


[docs]class Flickr(BaseData): r"""The Flickr dataset is a social network dataset for vertex classification task. It is a social network where nodes represent users and edges correspond to friendships among users. The labels represent the interest groups of the users. .. note:: The L1-normalization for the feature is not recommended for this dataset. The content of the Flickr dataset includes the following: - ``num_classes``: The number of classes: :math:`9`. - ``num_vertices``: The number of vertices: :math:`7,575`. - ``num_edges``: The number of edges: :math:`479,476`. - ``dim_features``: The dimension of features: :math:`12,047`. - ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(7,575 \times 12,047)`. - ``edge_list``: The edge list. ``List`` with length :math:`(479,476 \times 2)`. - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(7,575, )`. Args: ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``. """ def __init__(self, data_root: Optional[str] = None) -> None: super().__init__("Flickr", data_root) self._content = { "num_classes": 9, "num_vertices": 7575, "num_edges": 239738, "dim_features": 12047, "features": { "upon": [{"filename": "features.pkl", "md5": "8e889c8532a91ddcb29d6a9c377b5528"}], "loader": load_from_pickle, "preprocess": [to_tensor], # partial(norm_ft, ord=1) }, "edge_list": { "upon": [{"filename": "edge_list.pkl", "md5": "ea7412a30539fbc95f76ee3712a07017"}], "loader": load_from_pickle, }, "labels": { "upon": [{"filename": "labels.pkl", "md5": "9603c29e31b863a34fc707b606c02880"}], "loader": load_from_pickle, "preprocess": [to_long_tensor], }, }