فهرست منبع

fix(cuhkpedes): 修复实体添加笔记本中的执行顺序和输出

- 更新了 cuhkpedes_entity_add.ipynb 文件中的执行计数
- 修正了实体添加的输出结果,将 'around,headphones' 改为 'headphones,pants'
Yijun Fu 1 ماه پیش
والد
کامیت
5302a200f4
3فایلهای تغییر یافته به همراه2075 افزوده شده و 22 حذف شده
  1. 7 7
      cuhkpedes/cuhkpedes_entity_add.ipynb
  2. 2047 0
      cuhkpedes/cuhkpedes_pair_make.ipynb
  3. 21 15
      cuhkpedes/cuhkpedes_topk_summarize.ipynb

+ 7 - 7
cuhkpedes/cuhkpedes_entity_add.ipynb

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -174,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -193,7 +193,7 @@
       "0  train_query/p8848_s17661.jpg   \n",
       "\n",
       "                                              entity  \n",
-      "0  man,striped,tank,neck,pair,shoes,around,headph...  \n",
+      "0  man,striped,tank,neck,pair,shoes,headphones,pants  \n",
       "                                            captions           file_path  \\\n",
       "0  the man has short, dark hair and wears khaki p...  CUHK01/0107002.png   \n",
       "\n",

+ 2047 - 0
cuhkpedes/cuhkpedes_pair_make.ipynb

@@ -0,0 +1,2047 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 添加cuhkpedes每句的配对"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 设置数据集路径"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 获取主目录\n",
+    "home_directory = os.path.expanduser('~')\n",
+    "dataset_path = os.path.join(home_directory, 'dataset/cross_reid/CUHK-PEDES')\n",
+    "train_file = os.path.join(dataset_path, 'train_entity.csv')\n",
+    "pair_file = os.path.join(dataset_path, 'train_pair.json')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 读取 CSV 文件"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "entity_file = pd.read_csv(train_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 构建实体字典"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done calculating entity dict\n",
+      "hair 17956\n",
+      "person 3023\n",
+      "shoes 22725\n",
+      "sneakers 4958\n",
+      "sweatshirt 867\n",
+      "pedestrian 339\n",
+      "sleeve 3970\n",
+      "pants 17603\n",
+      "man 17240\n",
+      "jacket 6169\n",
+      "hand 4890\n",
+      "shirt 25183\n",
+      "jeans 6731\n",
+      "carrying 11519\n",
+      "backpack 5716\n",
+      "hoodie 652\n",
+      "tennis 3091\n",
+      "sleeved 4396\n",
+      "vest 504\n",
+      "hat 1158\n",
+      "neon 180\n",
+      "logo 560\n",
+      "traffic 4\n",
+      "gear 10\n",
+      "camera 759\n",
+      "officer 21\n",
+      "pair 13372\n",
+      "safety 30\n",
+      "arm 1711\n",
+      "button-up 57\n",
+      "button 1115\n",
+      "facing 581\n",
+      "slacks 871\n",
+      "uniform 104\n",
+      "cap 616\n",
+      "police 23\n",
+      "woman 14571\n",
+      "striped 1485\n",
+      "jogging 58\n",
+      "tote 545\n",
+      "bag 9483\n",
+      "pink/black 3\n",
+      "t-shirt 5388\n",
+      "book 615\n",
+      "haired 751\n",
+      "black/white 83\n",
+      "hands 1299\n",
+      "blouse 1182\n",
+      "shoulder 6611\n",
+      "mall 133\n",
+      "casual 195\n",
+      "indoor 47\n",
+      "security 6\n",
+      "sleeves 2364\n",
+      "well 399\n",
+      "glass 32\n",
+      "capri 456\n",
+      "one 1379\n",
+      "purse 2845\n",
+      "baggy 332\n",
+      "pack 2013\n",
+      "elbows 226\n",
+      "case 168\n",
+      "see 165\n",
+      "straps 656\n",
+      "elbow 304\n",
+      "adult 427\n",
+      "individual 346\n",
+      "trousers 316\n",
+      "complexion 70\n",
+      "glasses 4377\n",
+      "sweater 1646\n",
+      "collar 1147\n",
+      "forward 652\n",
+      "head 1053\n",
+      "visible 590\n",
+      "prescription 136\n",
+      "watch 1055\n",
+      "sideways 40\n",
+      "shoulders 1311\n",
+      "part 64\n",
+      "wrist 866\n",
+      "tshirt 363\n",
+      "swinging 83\n",
+      "shorts 8964\n",
+      "writing 526\n",
+      "printing 44\n",
+      "knee-length 284\n",
+      "worn 358\n",
+      "designs 130\n",
+      "lettering 176\n",
+      "edging 101\n",
+      "boy 980\n",
+      "navy 453\n",
+      "nike 161\n",
+      "socks 1464\n",
+      "clue 6\n",
+      "puffy 98\n",
+      "winter 82\n",
+      "running 462\n",
+      "coat 1605\n",
+      "hood 526\n",
+      "city 14\n",
+      "hoody 17\n",
+      "skinny 413\n",
+      "tight 848\n",
+      "pastel 37\n",
+      "ballet 36\n",
+      "floral 494\n",
+      "slippers 82\n",
+      "focus 5\n",
+      "slip 212\n",
+      "lady 2299\n",
+      "scarf 411\n",
+      "walker 4\n",
+      "medium 463\n",
+      "female 1126\n",
+      "bright-pink 13\n",
+      "braid 26\n",
+      "rear 92\n",
+      "tail 685\n",
+      "denim 1352\n",
+      "detailing 23\n",
+      "ball 98\n",
+      "horizontal 341\n",
+      "panel 33\n",
+      "athletic 520\n",
+      "markings 48\n",
+      "lace 110\n",
+      "sleeveless 926\n",
+      "pony 711\n",
+      "ballcap 4\n",
+      "daisy 5\n",
+      "mesh 14\n",
+      "jean 1806\n",
+      "cream 192\n",
+      "floor 271\n",
+      "tile 12\n",
+      "child 308\n",
+      "sandals 3056\n",
+      "arms 617\n",
+      "flops 657\n",
+      "flip 758\n",
+      "toddler 25\n",
+      "stroller 97\n",
+      "face 397\n",
+      "plain 107\n",
+      "green/yellow 9\n",
+      "blonde 439\n",
+      "strawberry 7\n",
+      "turquoise 116\n",
+      "guy 321\n",
+      "cuffs 118\n",
+      "strap 864\n",
+      "collared 1101\n",
+      "chest 620\n",
+      "brown/black 9\n",
+      "lavender 63\n",
+      "garment 58\n",
+      "bent 426\n",
+      "knees 721\n",
+      "rectangle 10\n",
+      "girl 2046\n",
+      "held 144\n",
+      "phone 1314\n",
+      "looks 785\n",
+      "boxy 36\n",
+      "sandles 68\n",
+      "darker 201\n",
+      "skirt 2619\n",
+      "knee 1190\n",
+      "thick 183\n",
+      "slants 14\n",
+      "souls 4\n",
+      "peasant 6\n",
+      "capris 333\n",
+      "women 439\n",
+      "ponytail 949\n",
+      "ends 72\n",
+      "graphic 484\n",
+      "polo 939\n",
+      "bill 14\n",
+      "barefoot 40\n",
+      "flowery 29\n",
+      "baseball 266\n",
+      "strapped 217\n",
+      "briefcase 205\n",
+      "sides 263\n",
+      "stripped 331\n",
+      "khaki 1481\n",
+      "dress 3652\n",
+      "balding 151\n",
+      "khakis 101\n",
+      "tee 1220\n",
+      "converse 48\n",
+      "male 1665\n",
+      "teens 9\n",
+      "twenties 17\n",
+      "cut 420\n",
+      "messenger 309\n",
+      "hanging 440\n",
+      "light-gray 147\n",
+      "leg 443\n",
+      "couple 47\n",
+      "knapsack 84\n",
+      "words 77\n",
+      "flat 699\n",
+      "pavement 77\n",
+      "stairs 170\n",
+      "cross-shoulder 8\n",
+      "laces 329\n",
+      "walk 91\n",
+      "emblem 73\n",
+      "someone 145\n",
+      "gym 167\n",
+      "bot 5\n",
+      "towards 238\n",
+      "cars 4\n",
+      "undershirt 157\n",
+      "motion 10\n",
+      "track 56\n",
+      "bushes 15\n",
+      "sweat 133\n",
+      "house 12\n",
+      "runs 28\n",
+      "plaid 941\n",
+      "v-neck 160\n",
+      "zipped 54\n",
+      "bottoms 223\n",
+      "bangs 198\n",
+      "tall 333\n",
+      "cardigan 195\n",
+      "dancing 49\n",
+      "charcoal 37\n",
+      "high 888\n",
+      "glitter 3\n",
+      "cutoff 12\n",
+      "boots 1299\n",
+      "cross 202\n",
+      "companion 9\n",
+      "pant 156\n",
+      "prints 16\n",
+      "stretch 17\n",
+      "breaker 10\n",
+      "school 36\n",
+      "yard 10\n",
+      "wind 14\n",
+      "flipflop 26\n",
+      "flop 81\n",
+      "outside 166\n",
+      "slung 327\n",
+      "hip 327\n",
+      "behind 748\n",
+      "cropped 334\n",
+      "shoes.he 313\n",
+      "caprices 3\n",
+      "crop 53\n",
+      "number 75\n",
+      "crossing 146\n",
+      "sunglasses 547\n",
+      "pocket 448\n",
+      "neckline 168\n",
+      "violet 11\n",
+      "standing 1026\n",
+      "maroon 282\n",
+      "stage 61\n",
+      "necklace 185\n",
+      "silver 431\n",
+      "stands 552\n",
+      "chain 36\n",
+      "highlights 20\n",
+      "neck 928\n",
+      "skin 323\n",
+      "buttons 182\n",
+      "v 70\n",
+      "gold 410\n",
+      "pullover 121\n",
+      "short-sleeve 418\n",
+      "blond 132\n",
+      "feet 925\n",
+      "band 179\n",
+      "piece 175\n",
+      "toe 121\n",
+      "colors 104\n",
+      "bracelet 243\n",
+      "matches 33\n",
+      "forearm 58\n",
+      "lapels 12\n",
+      "bluejeans 45\n",
+      "reddish 137\n",
+      "matching 253\n",
+      "handles 76\n",
+      "flats 306\n",
+      "grey/white 17\n",
+      "burgundy 202\n",
+      "wrinkles 31\n",
+      "cheeks 3\n",
+      "portion 19\n",
+      "eyes 33\n",
+      "escalator 116\n",
+      "type 228\n",
+      "heels 618\n",
+      "tie 577\n",
+      "business 178\n",
+      "sports 104\n",
+      "gentleman 177\n",
+      "suit 664\n",
+      "fedora 35\n",
+      "golf 13\n",
+      "shit 43\n",
+      "photo 109\n",
+      "device 67\n",
+      "eye 116\n",
+      "bald 167\n",
+      "flannel 76\n",
+      "middle 288\n",
+      "blurry 117\n",
+      "outdoor 92\n",
+      "walkway 62\n",
+      "blag 4\n",
+      "step 190\n",
+      "folds 29\n",
+      "curved 225\n",
+      "shoes.she 204\n",
+      "crosses 47\n",
+      "waist 660\n",
+      "dark-gray 122\n",
+      "straight 646\n",
+      "pockets 315\n",
+      "checkered 331\n",
+      "pants.he 20\n",
+      "gun 26\n",
+      "overcoat 91\n",
+      "belt 820\n",
+      "zip-up 16\n",
+      "handbag 592\n",
+      "circular 28\n",
+      "kind 87\n",
+      "curves 24\n",
+      "piping 22\n",
+      "tank 788\n",
+      ".he 90\n",
+      "hair.is 6\n",
+      "shorts.with 4\n",
+      "shorts.he 19\n",
+      "name 49\n",
+      "white/grey 9\n",
+      "brand 16\n",
+      "ground 178\n",
+      "trunks 10\n",
+      "swimming 7\n",
+      "downward 60\n",
+      "buttocks 30\n",
+      "pool 7\n",
+      "swim 13\n",
+      "area 144\n",
+      "cup 75\n",
+      "underwear 4\n",
+      "years 12\n",
+      "section 24\n",
+      "platform 52\n",
+      "brunette 67\n",
+      "wrapped 53\n",
+      "sort 103\n",
+      "newspaper 46\n",
+      "rocks 4\n",
+      "baby 149\n",
+      "wrap 29\n",
+      "chair 13\n",
+      "paper 310\n",
+      "bandanna 5\n",
+      "lack 21\n",
+      "mans 23\n",
+      "sweatpants 81\n",
+      "bookbag 98\n",
+      "apart 29\n",
+      "gender 28\n",
+      "peach 117\n",
+      "necktie 20\n",
+      "haircut 95\n",
+      "attire 45\n",
+      "suite 21\n",
+      "teenage 85\n",
+      "folks 21\n",
+      "steps 154\n",
+      "ankle 405\n",
+      "plastic 339\n",
+      "umbrella 538\n",
+      "flip-flops 110\n",
+      "pointing 55\n",
+      "outlines 4\n",
+      "slim 209\n",
+      "carrier 29\n",
+      "draped 157\n",
+      "stride 46\n",
+      "show 41\n",
+      "iphone 5\n",
+      "hose 46\n",
+      "panty 33\n",
+      "trench 77\n",
+      "viewer 33\n",
+      "mid-calf 95\n",
+      "end 103\n",
+      "carry 129\n",
+      "pants.with 5\n",
+      "half 110\n",
+      "way 89\n",
+      "fitting 331\n",
+      "slit 44\n",
+      "caramel 5\n",
+      "come 71\n",
+      "material 26\n",
+      "clothes 188\n",
+      "flight 18\n",
+      "shoulder-length 109\n",
+      "beanie 24\n",
+      "high-top 22\n",
+      "youth 6\n",
+      "girls 38\n",
+      "wording 17\n",
+      "slip-on 62\n",
+      "frame 55\n",
+      "profile 72\n",
+      "stomach 49\n",
+      "bermuda 34\n",
+      "quarter 65\n",
+      "toed 59\n",
+      "brick 40\n",
+      "thigh 171\n",
+      "papers 165\n",
+      "footwear 41\n",
+      "wedge 91\n",
+      "lighter 174\n",
+      "room 106\n",
+      "leggings 988\n",
+      "toes 46\n",
+      "hemline 16\n",
+      "sheer 73\n",
+      "store 81\n",
+      "wear 127\n",
+      "drink 77\n",
+      "drinking 25\n",
+      "foot 284\n",
+      "clothing 243\n",
+      "mirror 6\n",
+      "letters 86\n",
+      "stack 23\n",
+      "folders 3\n",
+      "file 7\n",
+      "dirty 24\n",
+      "costume 23\n",
+      "weapons 4\n",
+      "bicycle 157\n",
+      "cross-body 99\n",
+      "bags 432\n",
+      "lines 117\n",
+      "fanny 56\n",
+      "wristwatch 66\n",
+      "line 117\n",
+      "receding 30\n",
+      "heart 18\n",
+      "animal 33\n",
+      "cartoon 22\n",
+      "cuffed 92\n",
+      "miniskirt 61\n",
+      "tunic 116\n",
+      "pumps 63\n",
+      "tanktop 15\n",
+      "thin 435\n",
+      "mini 91\n",
+      "top.she 8\n",
+      "flowers 144\n",
+      "ear 187\n",
+      "eyeglasses 241\n",
+      "character 22\n",
+      "bear 8\n",
+      "pictures 47\n",
+      "hangs 102\n",
+      "look 78\n",
+      "sport 43\n",
+      "bun 259\n",
+      "snickers 17\n",
+      "polka 131\n",
+      "apir 21\n",
+      "dots 113\n",
+      "hollister 2\n",
+      "resting 54\n",
+      "rests 23\n",
+      "place 21\n",
+      "bra 46\n",
+      "pull 34\n",
+      "tee-shirt 183\n",
+      "hold 47\n",
+      "sandal 29\n",
+      "sky 29\n",
+      "grocery 68\n",
+      "gay 17\n",
+      "boot 23\n",
+      "sideburns 11\n",
+      "details 34\n",
+      "nikes 12\n",
+      "bang 9\n",
+      "fur 133\n",
+      "legs 139\n",
+      "heavy 220\n",
+      ".she 116\n",
+      "set 85\n",
+      "nape 9\n",
+      "graphics 72\n",
+      "crocs 17\n",
+      "royal 56\n",
+      "strip 68\n",
+      "droopy 9\n",
+      "t-short 5\n",
+      "blazer 138\n",
+      "buzz 19\n",
+      "weight 28\n",
+      "shirts 105\n",
+      "loafers 95\n",
+      "ears 63\n",
+      "subway 9\n",
+      "stop 55\n",
+      "decoration 13\n",
+      "blues 19\n",
+      "pinks 9\n",
+      "bottle 234\n",
+      "tights 357\n",
+      "low 251\n",
+      "lime 141\n",
+      "reading 80\n",
+      "slender 167\n",
+      "stitching 16\n",
+      "zip 80\n",
+      "rimmed 64\n",
+      ".. 72\n",
+      "shades 48\n",
+      "bike 114\n",
+      "rim 17\n",
+      "multi 124\n",
+      "aqua 63\n",
+      "greens 7\n",
+      "view 97\n",
+      "travel 16\n",
+      "rust 10\n",
+      "overshirt 25\n",
+      "grey/black 20\n",
+      "eyeglass 36\n",
+      "frames 50\n",
+      "swings 63\n",
+      "gray-and-black 21\n",
+      "openings 8\n",
+      "gown 93\n",
+      "robe 115\n",
+      "level 154\n",
+      "whit 78\n",
+      "flowing 135\n",
+      "shawl 29\n",
+      "angles 6\n",
+      "priest 2\n",
+      "fists 6\n",
+      "pulling 156\n",
+      "base 32\n",
+      "mid 231\n",
+      "turtle 22\n",
+      "v-neckline 30\n",
+      "three-quarter 27\n",
+      "handle 109\n",
+      "clogs 14\n",
+      "zipper 69\n",
+      "blue/grey 6\n",
+      "laptop 73\n",
+      "friend 23\n",
+      "gift 5\n",
+      "chin-length 13\n",
+      "suspenders 33\n",
+      "colorful 219\n",
+      "keys 6\n",
+      "stature 6\n",
+      "duffle 28\n",
+      "message 4\n",
+      "sex 6\n",
+      "group 28\n",
+      "people 106\n",
+      "cover 34\n",
+      "shins 8\n",
+      "portfolio 11\n",
+      "midriff 58\n",
+      "cigarette 19\n",
+      "moccasin 3\n",
+      "suitcase 174\n",
+      "lanyard 46\n",
+      "tag 50\n",
+      "pale 189\n",
+      "pans 43\n",
+      "folder 72\n",
+      "computer 48\n",
+      "car 30\n",
+      "plad 3\n",
+      "cloth 45\n",
+      "creases 7\n",
+      "ruffles 42\n",
+      "balck 39\n",
+      "addition 14\n",
+      "calf 95\n",
+      "stockings 135\n",
+      "pantyhose 70\n",
+      "blacks 12\n",
+      "night 20\n",
+      "evening 13\n",
+      "hoes 26\n",
+      "hills.she 7\n",
+      "wrists 33\n",
+      "sash 32\n",
+      "fluffy 25\n",
+      "furry 27\n",
+      "edge 27\n",
+      "burka 4\n",
+      "wedges.she 10\n",
+      "armpit 5\n",
+      "bit 26\n",
+      "seam 31\n",
+      "road 41\n",
+      "squares 17\n",
+      "hips 132\n",
+      "gloves 145\n",
+      "jackets 23\n",
+      "ivory 18\n",
+      "lightweight 15\n",
+      "background 55\n",
+      "splotches 5\n",
+      "waistband 47\n",
+      "tops 32\n",
+      "shorts.her 6\n",
+      "shot 25\n",
+      "combat 10\n",
+      "red-and-black 14\n",
+      "books 75\n",
+      "heel 207\n",
+      "cutout 5\n",
+      "building 165\n",
+      "office 15\n",
+      "fit 43\n",
+      "form 42\n",
+      "towel 31\n",
+      "bare 58\n",
+      "fingers 36\n",
+      "tattoo 17\n",
+      "letter 20\n",
+      "figure 23\n",
+      "lifts 57\n",
+      "door 28\n",
+      "camo 68\n",
+      "items 62\n",
+      "opaque 11\n",
+      "image 135\n",
+      "bird 11\n",
+      "build 65\n",
+      "horizontally 98\n",
+      "square 106\n",
+      "construction 9\n",
+      "objects 34\n",
+      "leggins 34\n",
+      "shin 12\n",
+      "package 72\n",
+      "talks 23\n",
+      "exercise 18\n",
+      "silky 10\n",
+      "shiny 244\n",
+      "brimmed 27\n",
+      "yellowish 27\n",
+      "wavy 84\n",
+      "sun 77\n",
+      "cotton 36\n",
+      "bow 89\n",
+      "things 20\n",
+      "phone.she 7\n",
+      "tank-top 10\n",
+      "anything 15\n",
+      "rectangular 97\n",
+      "water 173\n",
+      "visor 33\n",
+      "star 11\n",
+      "indeterminate 6\n",
+      "backward 14\n",
+      "yoke 16\n",
+      "satchel 234\n",
+      "apair 19\n",
+      "camouflage 81\n",
+      "decent 36\n",
+      "stair 8\n",
+      "height 89\n",
+      "container 33\n",
+      "garbage 8\n",
+      "w/black 4\n",
+      "surface 17\n",
+      "hairline 12\n",
+      "microphone 50\n",
+      "mouth 93\n",
+      "waistline 18\n",
+      "stairway 9\n",
+      "parcel 12\n",
+      "triangles 6\n",
+      "sihrt 27\n",
+      "workout 11\n",
+      "teal 201\n",
+      "apparel 3\n",
+      "direction 176\n",
+      "sunhat 6\n",
+      "jumper 53\n",
+      "accents 111\n",
+      "string 11\n",
+      "calf-length 15\n",
+      "parka 17\n",
+      "jumpsuit 42\n",
+      "wrinkle 12\n",
+      "short-sleeves 7\n",
+      "cart 65\n",
+      "flap 49\n",
+      "luggage 126\n",
+      "sholder 12\n",
+      "sandels 45\n",
+      "trouser 30\n",
+      "board 28\n",
+      "clip 20\n",
+      "clipboard 13\n",
+      "ties 19\n",
+      "buckles 11\n",
+      "beard 120\n",
+      "flipflops 64\n",
+      "card 18\n",
+      "wallet 41\n",
+      "sign 17\n",
+      "symbols 11\n",
+      "scoop-neck 6\n",
+      "medium-length 35\n",
+      "mauve 19\n",
+      "nude 35\n",
+      "dress.she 10\n",
+      "hair.she 15\n",
+      "n 12\n",
+      "calve 4\n",
+      "taupe 20\n",
+      "sporty 10\n",
+      "drapes 14\n",
+      "shapes 29\n",
+      "shift 16\n",
+      "earrings 36\n",
+      "breast 88\n",
+      "torso 54\n",
+      "strings 18\n",
+      "x 9\n",
+      "shoe 144\n",
+      "outline 30\n",
+      "angle 25\n",
+      "notebook 60\n",
+      "areas 6\n",
+      "binder 16\n",
+      "work 101\n",
+      "documents 6\n",
+      "pantsuit 6\n",
+      "crowd 26\n",
+      "rest 27\n",
+      "concrete 21\n",
+      "hiking 22\n",
+      "paints 20\n",
+      "pant.her 5\n",
+      "ash 9\n",
+      "subject 74\n",
+      "crew 32\n",
+      "student 36\n",
+      "checked 78\n",
+      "cut-off 11\n",
+      "corner 17\n",
+      "kid 88\n",
+      "pushes 30\n",
+      "heals 30\n",
+      "draping 8\n",
+      "motorcycle 15\n",
+      "box 99\n",
+      "sack 36\n",
+      "size 22\n",
+      "window 21\n",
+      "strides 8\n",
+      "red/white 19\n",
+      "backpack.he 12\n",
+      "dis 5\n",
+      "clutch 41\n",
+      "text 20\n",
+      "hallway 28\n",
+      "blue/white 30\n",
+      "skit 6\n",
+      "pink/white 8\n",
+      "book-bag 15\n",
+      "descent 33\n",
+      "boots.she 11\n",
+      "outdoors 43\n",
+      "cobalt 2\n",
+      "yoga 42\n",
+      "drawstring 23\n",
+      "black-and-gray 29\n",
+      "flag 18\n",
+      "bracelets 47\n",
+      "polkadots 6\n",
+      "dot 81\n",
+      "hand.she 8\n",
+      "thighs 43\n",
+      "wall 75\n",
+      "lot 25\n",
+      "cape 31\n",
+      "shows 77\n",
+      "bustier 6\n",
+      "lacy 60\n",
+      "runway 10\n",
+      "crotch 8\n",
+      "counter 20\n",
+      "slide 23\n",
+      "jet 19\n",
+      "fancy 25\n",
+      "magazine 23\n",
+      "article 5\n",
+      "sneaker 21\n",
+      "striping 15\n",
+      "adidas 31\n",
+      "eye-glasses 4\n",
+      "cloak 13\n",
+      "ankles 117\n",
+      "corset 3\n",
+      "interior 4\n",
+      "glassess 7\n",
+      "rims 8\n",
+      "darkly 38\n",
+      "instrument 13\n",
+      "roller 21\n",
+      "bucket 13\n",
+      "cuff 42\n",
+      "sling 32\n",
+      "bands 26\n",
+      "sheets 4\n",
+      "cowboy 21\n",
+      "fashion 18\n",
+      "log 12\n",
+      "reach 131\n",
+      "zippered 40\n",
+      "mint 55\n",
+      "nylon 4\n",
+      "showing 90\n",
+      "brief 42\n",
+      "pencil 29\n",
+      "sheath 12\n",
+      "table 26\n",
+      "somebody 14\n",
+      "round 72\n",
+      "medieval 4\n",
+      "pick 11\n",
+      "flower 99\n",
+      "radio 4\n",
+      "chino 4\n",
+      "metallic 42\n",
+      "leopard 35\n",
+      "wife 14\n",
+      "boat 29\n",
+      "deck 9\n",
+      "family 5\n",
+      "cast 9\n",
+      "curve 26\n",
+      "shadow 14\n",
+      "pocketbook 50\n",
+      ".with 7\n",
+      "bob 38\n",
+      "backwards 97\n",
+      "badge 63\n",
+      "policeman 3\n",
+      "patch 41\n",
+      "pony-tail 9\n",
+      "crossbody 43\n",
+      "destination 8\n",
+      "oval 27\n",
+      "jersey 56\n",
+      "mask 45\n",
+      "nose 26\n",
+      "camisole 21\n",
+      "flouncy 6\n",
+      "cocktail 17\n",
+      "tips 14\n",
+      "hills 6\n",
+      "manilla 3\n",
+      "penny 5\n",
+      "loafer 20\n",
+      "stick 26\n",
+      "pants.she 11\n",
+      "bicycles 33\n",
+      "wash 41\n",
+      "curb 9\n",
+      "inches 43\n",
+      "hosiery 4\n",
+      "drooping 21\n",
+      "backside 15\n",
+      "tube 22\n",
+      "inning 6\n",
+      "center 79\n",
+      "orang 7\n",
+      "shots 16\n",
+      "flops.he 27\n",
+      "move 6\n",
+      "thrown 8\n",
+      "thongs 7\n",
+      "triangular 26\n",
+      "buds 20\n",
+      "quarters 5\n",
+      "accessory 9\n",
+      "hall 20\n",
+      "love 7\n",
+      "indoors 6\n",
+      "men 48\n",
+      "rolling 65\n",
+      "metal 41\n",
+      "seat 8\n",
+      "lunch 14\n",
+      "yellow/black 4\n",
+      "hand.he 7\n",
+      "shadows 10\n",
+      "bouquet 7\n",
+      "crown 6\n",
+      "ribbon 31\n",
+      "teeshirt 8\n",
+      "bows 16\n",
+      "sole 78\n",
+      "bus 16\n",
+      "hair.he 3\n",
+      "wedges 28\n",
+      "wheelie 5\n",
+      "pulls 33\n",
+      "trip 4\n",
+      "music 25\n",
+      "dark/black 7\n",
+      "canvas 58\n",
+      "afro 9\n",
+      "th 10\n",
+      "train 15\n",
+      "wristband 16\n",
+      "crook 12\n",
+      "wheels 23\n",
+      "stops 29\n",
+      "characters 12\n",
+      "circle 24\n",
+      "headphones 94\n",
+      "earbuds 42\n",
+      "duffel 37\n",
+      "basketball 50\n",
+      "insignia 13\n",
+      "bars 8\n",
+      "circles 16\n",
+      "diamond 19\n",
+      "tuxedo 27\n",
+      "scrunchie 11\n",
+      "shimmery 5\n",
+      "curls 8\n",
+      "time 15\n",
+      "period 5\n",
+      "sparkly 22\n",
+      "roses 9\n",
+      "leaves 11\n",
+      "windbreaker 10\n",
+      "choker 4\n",
+      "mid-length 36\n",
+      "read 25\n",
+      "railing 71\n",
+      "grass 37\n",
+      "summer 35\n",
+      "deeply 10\n",
+      "hems 17\n",
+      "drinks 4\n",
+      "party 9\n",
+      "spots 61\n",
+      "mean 5\n",
+      "brim 44\n",
+      "sundress 22\n",
+      "cellphone 133\n",
+      "features 10\n",
+      "males 5\n",
+      "eshoes 5\n",
+      "beverage 24\n",
+      "strips 34\n",
+      "hung 27\n",
+      "knit 37\n",
+      "g 4\n",
+      "trees 18\n",
+      "burns 5\n",
+      "collard 37\n",
+      "camel 12\n",
+      "row 9\n",
+      "forearms 5\n",
+      "word 26\n",
+      "cat 17\n",
+      "stand 21\n",
+      "pendant 18\n",
+      "hi 6\n",
+      "symbol 41\n",
+      "white/black 47\n",
+      "strand 4\n",
+      "art 8\n",
+      "close 72\n",
+      "jeans.she 4\n",
+      "backback 11\n",
+      "violin 2\n",
+      "playing 31\n",
+      "cheerleader 13\n",
+      "females 9\n",
+      "patterns 58\n",
+      "ants 7\n",
+      "compartments 8\n",
+      "criss 7\n",
+      "gauzy 14\n",
+      "frills 6\n",
+      "slipper 11\n",
+      "id 21\n",
+      "individuals 24\n",
+      "pleats 14\n",
+      "holes 31\n",
+      "lawn 8\n",
+      "scoop 56\n",
+      "tablet 30\n",
+      "jack 22\n",
+      "cane 28\n",
+      "red/orange 3\n",
+      "sheet 8\n",
+      "daughter 6\n",
+      "ponytails 8\n",
+      "straw 38\n",
+      "ice 10\n",
+      "cone 6\n",
+      "coffee 24\n",
+      "kaki 28\n",
+      "chubby 10\n",
+      "brownish 42\n",
+      "bridge 7\n",
+      "mobile 28\n",
+      "layers 15\n",
+      "bunch 8\n",
+      "headband 61\n",
+      "drum 7\n",
+      "bath 8\n",
+      "bandage 5\n",
+      "chunky 21\n",
+      "palm 17\n",
+      "blue/black 12\n",
+      "edges 48\n",
+      "hip-covering 15\n",
+      "shes 55\n",
+      "pin 11\n",
+      "hear 15\n",
+      "tattoos 12\n",
+      "sparkles 8\n",
+      "distance 19\n",
+      "curving 21\n",
+      "white/blue 6\n",
+      "faint 13\n",
+      "flops.she 24\n",
+      "doll 3\n",
+      "wit 9\n",
+      "h 8\n",
+      "ocean 7\n",
+      "stance 19\n",
+      "lobby 6\n",
+      "tone 55\n",
+      "plum 20\n",
+      "sweats 17\n",
+      "bottles 5\n",
+      "downwards 25\n",
+      "oxford 13\n",
+      "duck 4\n",
+      "park 17\n",
+      "day 26\n",
+      "strapless 27\n",
+      "fleece 25\n",
+      "someones 8\n",
+      "label 11\n",
+      "camoflauge 7\n",
+      "bowl 17\n",
+      "sneakers.he 4\n",
+      "jeans.his 7\n",
+      "causal 12\n",
+      "mustache 44\n",
+      "ring 21\n",
+      "chin 83\n",
+      "balance 18\n",
+      "magenta 30\n",
+      "feathers 6\n",
+      "dirt 12\n",
+      "hightop 5\n",
+      "model 4\n",
+      "blade 6\n",
+      "stretchy 6\n",
+      "sweetheart 7\n",
+      "envelope 18\n",
+      "document 15\n",
+      "earphones 21\n",
+      "setting 10\n",
+      "pixie 8\n",
+      "collarless 15\n",
+      "mop 11\n",
+      "ray 9\n",
+      "age 28\n",
+      "raglan 12\n",
+      "purchases 4\n",
+      "maxi 11\n",
+      "air 47\n",
+      "eagle 5\n",
+      "tab 4\n",
+      "hit 14\n",
+      "finger 19\n",
+      "fabric 61\n",
+      "tree 21\n",
+      "thought 4\n",
+      "pant.his 9\n",
+      "trainers 7\n",
+      "lilac 12\n",
+      "display 7\n",
+      "iron 2\n",
+      "checks 7\n",
+      "union 6\n",
+      "sections 8\n",
+      "thumb 10\n",
+      "slack 14\n",
+      "apron 73\n",
+      "mail 9\n",
+      "jane 5\n",
+      "smiles 4\n",
+      "selfie 3\n",
+      "leaf 6\n",
+      "sorts 7\n",
+      "indigo 8\n",
+      "field 23\n",
+      "outer 31\n",
+      "headpiece 4\n",
+      "fence 31\n",
+      "bundle 6\n",
+      "bandana 10\n",
+      "screen 19\n",
+      "shelves 4\n",
+      "market 7\n",
+      "woven 8\n",
+      "designer 16\n",
+      "backs 8\n",
+      "pedal 12\n",
+      "pusher 4\n",
+      "ans 25\n",
+      "pace 15\n",
+      "sandals.he 23\n",
+      "fall 41\n",
+      "overlay 10\n",
+      "pure 11\n",
+      "hairs 4\n",
+      "pajamas 4\n",
+      "swoosh 14\n",
+      "rope 6\n",
+      "over-shirt 5\n",
+      "buckle 40\n",
+      "spaghetti 28\n",
+      "glove 13\n",
+      "ruffle 12\n",
+      "think 13\n",
+      "forehead 35\n",
+      "spot 36\n",
+      "mid-stride 5\n",
+      "parts 6\n",
+      "sandals.she 25\n",
+      "stains 5\n",
+      "paint 5\n",
+      "dye 7\n",
+      "path 18\n",
+      "wine 7\n",
+      "description 11\n",
+      "baldness 5\n",
+      "bar 14\n",
+      "satin 3\n",
+      "sirt 6\n",
+      "t-shit 12\n",
+      "below-the-knee 16\n",
+      "lab 13\n",
+      "teen 18\n",
+      "flashlight 6\n",
+      "coloring 6\n",
+      "fruit 4\n",
+      "vehicle 18\n",
+      "zigzag 8\n",
+      "position 12\n",
+      "salmon 32\n",
+      "shirt.with 17\n",
+      "kapris 5\n",
+      "legging 20\n",
+      "gladiator 18\n",
+      "flare 9\n",
+      "thirties 6\n",
+      "zippers 17\n",
+      "moccasins 5\n",
+      "manner 5\n",
+      "mohawk 6\n",
+      "eans 4\n",
+      "shorts.she 6\n",
+      "dar 5\n",
+      "food 15\n",
+      "embellishments 9\n",
+      "advertisement 5\n",
+      "lots 16\n",
+      "rucksack 11\n",
+      "cow 3\n",
+      "letterman 5\n",
+      "trimming 23\n",
+      "pathway 5\n",
+      "tux 6\n",
+      "mini-skirt 14\n",
+      "pole 29\n",
+      "map 7\n",
+      "multi-color 15\n",
+      "ride 5\n",
+      "taxi 6\n",
+      "t-shirts 12\n",
+      "panels 18\n",
+      "jacket.she 4\n",
+      "bag.she 4\n",
+      "moustache 10\n",
+      "dresses 6\n",
+      "sword 13\n",
+      "armour 3\n",
+      "weapon 6\n",
+      "ornaments 4\n",
+      "goatee 12\n",
+      "crosswalk 10\n",
+      "heal 7\n",
+      "army 34\n",
+      "auburn 28\n",
+      "umbrellas 4\n",
+      "flip-flop 11\n",
+      "sand 24\n",
+      "halter 18\n",
+      "plants 11\n",
+      "hunter 6\n",
+      "colour 15\n",
+      "mustard 22\n",
+      "spectacles 6\n",
+      "weather 8\n",
+      "speaking 25\n",
+      "panties 13\n",
+      "flats.she 29\n",
+      "decorations 9\n",
+      "patches 30\n",
+      "back-pack 18\n",
+      "paperwork 11\n",
+      "cranberry 5\n",
+      "numbers 13\n",
+      "script 3\n",
+      "knot 4\n",
+      "flowy 29\n",
+      "handlebars 4\n",
+      "wire 25\n",
+      "earbud 8\n",
+      "rings 10\n",
+      "bread 4\n",
+      "index 6\n",
+      "wool 15\n",
+      "shelf 7\n",
+      "woods 6\n",
+      "pads 6\n",
+      "sticks 5\n",
+      "others 16\n",
+      "beater 8\n",
+      "hes 27\n",
+      "shirt.he 9\n",
+      "booties 13\n",
+      "bowtie 13\n",
+      "images 8\n",
+      "butterfly 9\n",
+      "cute 6\n",
+      "mini-dress 17\n",
+      "hairdo 4\n",
+      "coats 5\n",
+      "kimono 20\n",
+      "soda 21\n",
+      "slant 23\n",
+      "let 10\n",
+      "buildings 11\n",
+      "eshirt 5\n",
+      "brow 4\n",
+      "corridor 11\n",
+      "turtleneck 31\n",
+      "attache 5\n",
+      "blur 9\n",
+      "mixture 6\n",
+      "scooter 9\n",
+      "stone 12\n",
+      "hip-length 32\n",
+      "shoes.l 9\n",
+      "clad 10\n",
+      "son 25\n",
+      "patter 10\n",
+      "cami 7\n",
+      "stilettos 5\n",
+      "adults 26\n",
+      "romper 12\n",
+      "illustrations 4\n",
+      "walkie 5\n",
+      "talkie 5\n",
+      "updo 11\n",
+      "black/pink 6\n",
+      "packages 8\n",
+      "persons 18\n",
+      "handkerchief 4\n",
+      "staircase 10\n",
+      "lace-up 10\n",
+      "watchband 6\n",
+      "ad 21\n",
+      "shouder 2\n",
+      "vans 11\n",
+      "batman 3\n",
+      "mouse 14\n",
+      "checkerboard 10\n",
+      "fist 18\n",
+      "pushers 7\n",
+      "life 3\n",
+      "lenses 3\n",
+      "trash 7\n",
+      "ipod 9\n",
+      "upwards 11\n",
+      "classes 5\n",
+      "flaps 5\n",
+      "epaulets 6\n",
+      "closures 4\n",
+      "transparent 13\n",
+      "vibrant 8\n",
+      "greay 4\n",
+      "ladies 17\n",
+      "bat 6\n",
+      "phone.he 6\n",
+      "skull 15\n",
+      "shite 17\n",
+      "thing 26\n",
+      "cord 20\n",
+      "station 2\n",
+      "photograph 9\n",
+      "tip 4\n",
+      "rain 18\n",
+      "patchwork 9\n",
+      "gingham 9\n",
+      "gas 3\n",
+      "snow 15\n",
+      "rip 4\n",
+      "stain 4\n",
+      "gray/black 4\n",
+      "detail 9\n",
+      "inch 11\n",
+      "nylons 9\n",
+      "poofy 8\n",
+      "levis 5\n",
+      "scrubs 5\n",
+      "hurry 7\n",
+      "summery 10\n",
+      "writings 4\n",
+      "skirt.she 6\n",
+      "photographer 11\n",
+      "holder 13\n",
+      "banding 16\n",
+      "parallel 6\n",
+      "gentlemen 11\n",
+      "braids 11\n",
+      "wig 6\n",
+      "checker 14\n",
+      "tear-drop 6\n",
+      "boxes 9\n",
+      "skater 5\n",
+      "shoelaces 20\n",
+      "environment 5\n",
+      "identification 6\n",
+      "bangle 6\n",
+      "t'shirt 17\n",
+      "blocks 10\n",
+      "block 15\n",
+      "muscle 22\n",
+      "carpet 10\n",
+      "shape 18\n",
+      "gap 4\n",
+      "ankle-length 29\n",
+      "purses 7\n",
+      "illustration 13\n",
+      "inner 9\n",
+      "panda 4\n",
+      "beneath 27\n",
+      "skateboard 6\n",
+      "skates 5\n",
+      "blades 13\n",
+      "buttock 4\n",
+      "department 5\n",
+      "pea 12\n",
+      "pepper 13\n",
+      "salt 12\n",
+      "swung 7\n",
+      "stars 19\n",
+      "rof 14\n",
+      "airport 22\n",
+      "pai 13\n",
+      "e 7\n",
+      "short-shorts 6\n",
+      "multiple 46\n",
+      "smile 7\n",
+      "jacket.with 5\n",
+      "sets 3\n",
+      "backpacks 9\n",
+      "rack 7\n",
+      "overalls 63\n",
+      "pinstripe 4\n",
+      "nothing 17\n",
+      "hang 18\n",
+      "ladder 3\n",
+      "inset 6\n",
+      "rod 14\n",
+      "rubber 22\n",
+      "mittens 3\n",
+      "brace 10\n",
+      "corners 3\n",
+      "mother 6\n",
+      "guitar 15\n",
+      "shirt.she 4\n",
+      "decal 10\n",
+      "hr 6\n",
+      "heather 6\n",
+      "marks 10\n",
+      "dress-shirt 2\n",
+      "sliver 9\n",
+      "sock 16\n",
+      "stole 8\n",
+      "lights 7\n",
+      "crocks 5\n",
+      "comb 7\n",
+      "bikini 15\n",
+      "duke 4\n",
+      "plack 3\n",
+      "pajama 5\n",
+      "diamonds 6\n",
+      "boardwalk 4\n",
+      "desk 6\n",
+      "bulky 21\n",
+      "skulls 9\n",
+      "figures 6\n",
+      "player 2\n",
+      "pieces 5\n",
+      "tie.he 4\n",
+      "pelvis 5\n",
+      "yellow/green 8\n",
+      "elevator 4\n",
+      "sachel 6\n",
+      "groceries 6\n",
+      "green/black 4\n",
+      "sandy 13\n",
+      "class 8\n",
+      "pilot 6\n",
+      "embellishment 5\n",
+      "spring 5\n",
+      "scrub 2\n",
+      "he/she 13\n",
+      "space 9\n",
+      "children 12\n",
+      "marking 5\n",
+      "se 7\n",
+      "briefcases 2\n",
+      "closure 12\n",
+      "dressy 25\n",
+      "tails 13\n",
+      "pigtails 15\n",
+      "suede 12\n",
+      "home 13\n",
+      "hate 3\n",
+      "broom 6\n",
+      "adolescent 4\n",
+      "movie 5\n",
+      "wedding 21\n",
+      "ons 5\n",
+      "rail 25\n",
+      "beret 10\n",
+      "hairstyle 20\n",
+      "pedestrain 4\n",
+      "rainbow 15\n",
+      "entrance 8\n",
+      "pose 11\n",
+      "glases 4\n",
+      "bench 10\n",
+      "jewelry 11\n",
+      "mid-back 8\n",
+      "keychain 3\n",
+      "streaks 3\n",
+      "mark 11\n",
+      "check 15\n",
+      "cheetah 7\n",
+      "his/her 13\n",
+      "beach 11\n",
+      "placket 4\n",
+      "phones 12\n",
+      "hijab 8\n",
+      "headdress 10\n",
+      "carriage 8\n",
+      "ark 4\n",
+      "high-tops 6\n",
+      "infront 4\n",
+      "sleave 10\n",
+      "abdomen 9\n",
+      "pointy 11\n",
+      "high-heels 8\n",
+      "sleevless 14\n",
+      "forest 14\n",
+      "fellow 16\n",
+      "swirl 4\n",
+      "shoulder.her 5\n",
+      "goggles 3\n",
+      "holster 5\n",
+      "heavyset 18\n",
+      "spike 6\n",
+      "opening 24\n",
+      "ramp 5\n",
+      "pad 9\n",
+      "coveralls 24\n",
+      "attention 9\n",
+      "vent 5\n",
+      "bunny 4\n",
+      "caps 3\n",
+      "shop 11\n",
+      "college 15\n",
+      "armor 10\n",
+      "post 7\n",
+      "pouch 13\n",
+      "hoop 7\n",
+      "shaw 4\n",
+      "pass 11\n",
+      "friends 9\n",
+      "poster 9\n",
+      "taller 5\n",
+      "scrunchy 4\n",
+      "cammo 3\n",
+      "silhouette 10\n",
+      "manw 4\n",
+      "pant.she 5\n",
+      "shade 24\n",
+      "mid-thighs 15\n",
+      "waste 8\n",
+      "makeup 4\n",
+      "parking 8\n",
+      "basket 26\n",
+      "points 14\n",
+      "shoulderbag 11\n",
+      "crossover 5\n",
+      "breasts 4\n",
+      "crisscross 7\n",
+      "dinner 3\n",
+      "plunging 10\n",
+      "support 9\n",
+      "bonnet 6\n",
+      "wristbands 2\n",
+      "tale 6\n",
+      "smock 14\n",
+      "tears 6\n",
+      "raincoat 8\n",
+      "tutu 5\n",
+      "browns 9\n",
+      "rise 5\n",
+      "poncho 12\n",
+      "logos 9\n",
+      "york 3\n",
+      "high-heel 10\n",
+      "racket 6\n",
+      "flips 5\n",
+      "melon 6\n",
+      "places 8\n",
+      "inscription 5\n",
+      "teenager 21\n",
+      "backless 14\n",
+      "embroidery 7\n",
+      "mature 8\n",
+      "eyebrows 5\n",
+      "bronze 6\n",
+      "stocking 11\n",
+      "tiger 4\n",
+      "guard 7\n",
+      "cheerleading 5\n",
+      "cheer 7\n",
+      "dance 14\n",
+      "bun.she 3\n",
+      "toy 7\n",
+      "pen 5\n",
+      "bodice 13\n",
+      "tea 7\n",
+      "notepad 3\n",
+      "note 7\n",
+      "instep 5\n",
+      "fun 4\n",
+      "blending 6\n",
+      "bikes 9\n",
+      "dog 16\n",
+      "help 6\n",
+      "shire 3\n",
+      "manila 5\n",
+      "slits 3\n",
+      "drawing 9\n",
+      "doorway 13\n",
+      "throat 3\n",
+      "bell 5\n",
+      "ballerina 5\n",
+      "hie 3\n",
+      "red/black 15\n",
+      "sleaves 4\n",
+      "reds 5\n",
+      "smartphone 11\n",
+      "tans 5\n",
+      "triangle 9\n",
+      "mug 5\n",
+      "use 4\n",
+      "three-quarters 6\n",
+      "forwards 14\n",
+      "lapel 10\n",
+      "sequins 7\n",
+      "event 7\n",
+      "lipstick 8\n",
+      "club 3\n",
+      "contents 3\n",
+      "tray 9\n",
+      "restaurant 3\n",
+      "lips 4\n",
+      "spandex 10\n",
+      "plate 3\n",
+      "beads 7\n",
+      "bomber 6\n",
+      "borders 3\n",
+      "turn 9\n",
+      "bathing 13\n",
+      "point 11\n",
+      "argyle 7\n",
+      "thermos 3\n",
+      "cardboard 8\n",
+      "acid 12\n",
+      "partner 5\n",
+      "ornament 5\n",
+      "handbags 7\n",
+      "capres 3\n",
+      "earring 7\n",
+      "glittery 3\n",
+      "ship 4\n",
+      "characteristics 3\n",
+      "vee 5\n",
+      "frill 4\n",
+      "mens 5\n",
+      "truck 3\n",
+      "fitness 5\n",
+      "paisley 9\n",
+      "cutouts 5\n",
+      "pant.he 4\n",
+      "rifle 15\n",
+      "gate 9\n",
+      "ponytail.she 2\n",
+      "jeans.he 5\n",
+      "cloths 3\n",
+      "wood 4\n",
+      "blush 3\n",
+      "cab 4\n",
+      "statue 4\n",
+      "helmet 42\n",
+      "crutch 5\n",
+      "appearance 6\n",
+      "poker 8\n",
+      "medallion 5\n",
+      "jerkin 3\n",
+      "silk 7\n",
+      "race 6\n",
+      "scene 8\n",
+      "degree 3\n",
+      "jacked 16\n",
+      "tartan 5\n",
+      "blood 3\n",
+      "tones 12\n",
+      "headscarf 4\n",
+      "hand.her 3\n",
+      "pamphlet 3\n",
+      "hobo 3\n",
+      "bag.he 4\n",
+      "peplum 4\n",
+      "hairband 4\n",
+      "calves 22\n",
+      "color.she 3\n",
+      "skirts 5\n",
+      "utility 7\n",
+      "black/grey 6\n",
+      "fan 3\n",
+      "blanket 9\n",
+      "slides 5\n",
+      "wheelchair 7\n",
+      "knife 3\n",
+      "multicolor 7\n",
+      "process 5\n",
+      "posture 4\n",
+      "waits 10\n",
+      "whites 8\n",
+      "expression 4\n",
+      "brown/tan 9\n",
+      "strands 6\n",
+      "bulge 4\n",
+      "rows 5\n",
+      "motor 5\n",
+      "play 4\n",
+      "chocolate 4\n",
+      "sleev 5\n",
+      "gal 10\n",
+      "shies 3\n",
+      "border 19\n",
+      "puffer 4\n",
+      "stiletto 12\n",
+      "grey/blue 10\n",
+      "accordion 4\n",
+      "cold 7\n",
+      "pit 7\n",
+      "packpack 5\n",
+      "midsection 5\n",
+      "accessories 5\n",
+      "date 3\n",
+      "nature 4\n",
+      "accent 12\n",
+      "crate 4\n",
+      "barrette 10\n",
+      "lad 5\n",
+      "pig 9\n",
+      "leotard 6\n",
+      "harness 5\n",
+      "turban 4\n",
+      "vase 5\n",
+      "garb 4\n",
+      "packet 5\n",
+      "sh 4\n",
+      "reminiscent 4\n",
+      "bathrobe 7\n",
+      "notebooks 2\n",
+      "seams 6\n",
+      "apple 3\n",
+      "year 3\n",
+      "hole 3\n",
+      "zebra 13\n",
+      "railings 4\n",
+      "sparkle 3\n",
+      "pleat 3\n",
+      "sea 10\n",
+      "linen 7\n",
+      "messanger 3\n",
+      "goldenrod 8\n",
+      "cement 6\n",
+      "packs 4\n",
+      "genes 5\n",
+      "back.she 4\n",
+      "tissue 2\n",
+      "directions 9\n",
+      "rips 9\n",
+      "fringe 8\n",
+      "shrug 4\n",
+      "feather 5\n",
+      "raspberry 4\n",
+      "sale 7\n",
+      "liquid 3\n",
+      "loop 7\n",
+      "cowl 2\n",
+      "packback 4\n",
+      "p 3\n",
+      "gradient 6\n",
+      "briefecase 5\n",
+      "kilt 4\n",
+      "consisting 6\n",
+      "football 4\n",
+      "hairpiece 3\n",
+      "ovals 5\n",
+      "actress 3\n",
+      "glasses.he 4\n",
+      "wires 4\n",
+      "rectangles 5\n",
+      "cords 5\n",
+      "grays 4\n",
+      "clack 5\n",
+      "la 6\n",
+      "hint 7\n",
+      "jug 6\n",
+      "milk 3\n",
+      "hues 5\n",
+      "worker 8\n",
+      "poles 6\n",
+      "saddles 4\n",
+      "red/pink 5\n",
+      "boys 7\n",
+      "mickey 10\n",
+      "kitty 4\n",
+      "har 2\n",
+      "wound 4\n",
+      "belts 2\n",
+      "ropes 4\n",
+      "call 5\n",
+      "contrast 5\n",
+      "megaphone 3\n",
+      "jump 10\n",
+      "reflector 4\n",
+      "carpi 8\n",
+      "backpack.she 4\n",
+      "sari 4\n",
+      "tigers 4\n",
+      "suv 4\n",
+      "tool 9\n",
+      "jade 4\n",
+      "minnie 2\n",
+      "pangs 4\n",
+      "powder 4\n",
+      "bananas 1\n",
+      "culottes 4\n",
+      "creme 3\n",
+      "hotel 3\n",
+      "equipment 4\n",
+      "karate 3\n",
+      "jacker 3\n",
+      "swimsuit 4\n",
+      "hue 2\n",
+      "armholes 4\n",
+      "location 3\n",
+      "bush 3\n",
+      "veil 3\n",
+      "monkey 2\n",
+      "street.she 4\n",
+      "doctor 4\n",
+      "fold 3\n",
+      "disney 4\n",
+      "checkers 4\n",
+      "specs 4\n",
+      "wardrobe 4\n",
+      "hearts 5\n",
+      "necklaces 9\n",
+      "pale-pink 5\n",
+      "manis 2\n",
+      "stool 2\n",
+      "specks 3\n",
+      "split 7\n",
+      "cases 2\n",
+      "plush 2\n",
+      "charm 3\n",
+      "rag 2\n",
+      "longsleeve 3\n",
+      "amount 2\n",
+      "pierce 3\n",
+      "celtics 5\n",
+      "balloons 2\n",
+      "trophy 6\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_entites = entity_file['entity'].tolist()\n",
+    "\n",
+    "entity_dict = {}\n",
+    "for i, each_entity in enumerate(all_entites):\n",
+    "    each_entity = each_entity.split(',')\n",
+    "    # print(i, each_entity)\n",
+    "    for sub_entity in each_entity:\n",
+    "        if sub_entity not in entity_dict:\n",
+    "            entity_dict[sub_entity] = []\n",
+    "        entity_dict[sub_entity].append(i)\n",
+    "\n",
+    "# # 使用字典推导式过滤掉值的长度为 1 的键值对\n",
+    "# filtered_entity_dict = {k: v for k, v in entity_dict.items() if len(v) > 1}\n",
+    "\n",
+    "print('Done calculating entity dict')\n",
+    "for k, v in entity_dict.items():\n",
+    "    print(k, len(v))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 配对"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done sampling pairs\n"
+     ]
+    }
+   ],
+   "source": [
+    "from multiprocessing import Pool, cpu_count\n",
+    "\n",
+    "\n",
+    "def process_entity(i, each_entity, topK, entity_dict):\n",
+    "    each_entity = each_entity.split(',')\n",
+    "    sampled_entity = np.random.choice(each_entity, size=topK, replace=True)\n",
+    "    sampled_pair = []\n",
+    "    for x in sampled_entity:\n",
+    "        available_indices = [idx for idx in entity_dict[x] if idx != i]\n",
+    "        sampled_pair.append(random.choice(available_indices))\n",
+    "    return sampled_pair, sampled_entity.tolist()\n",
+    "\n",
+    "\n",
+    "topK = 10\n",
+    "all_pairs = []\n",
+    "all_paired_entity = []\n",
+    "\n",
+    "# 创建进程池\n",
+    "with Pool(cpu_count()) as pool:\n",
+    "    results = pool.starmap(process_entity, [(i, each_entity, topK, entity_dict) for i, each_entity in enumerate(all_entites)])\n",
+    "\n",
+    "# 收集结果\n",
+    "for sampled_pair, sampled_entity in results:\n",
+    "    all_pairs.append(sampled_pair)\n",
+    "    all_paired_entity.append(sampled_entity)\n",
+    "\n",
+    "print('Done sampling pairs')\n",
+    "# print(all_pairs)\n",
+    "# print(all_paired_entity)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done constructing pairs\n"
+     ]
+    }
+   ],
+   "source": [
+    "assert len(all_pairs) == len(all_entites) == len(all_paired_entity)\n",
+    "entity_file['pairindex'] = all_pairs\n",
+    "entity_file['pairentity'] = all_paired_entity\n",
+    "entity_file.to_csv(train_file.replace('.csv', '_pair.csv'), index=False)\n",
+    "print('Done constructing pairs')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ovsegmentor",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 21 - 15
cuhkpedes/cuhkpedes_topk_summarize.ipynb

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -61,7 +61,7 @@
        "True"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -93,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -132,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -159,7 +159,7 @@
     "    'long', 'longer', 'longest', 'length', 'side', 'light', 'stripes', 'something', 'tan', 'stripe', 'print',\n",
     "    'picture', 'shopping', 'body', 'design', 'cell', 'color', 'object', 'trim', 'pattern', 'street', 'underneath',\n",
     "    'soles', 'beige', 'sidewalk', 'cargo', 'leather', 'outfit', 'walks', 'hem', 'walking', 'style', 'inside',\n",
-    "    'wears', 'item', 'holding', 'carring', 'bright', 'short'\n",
+    "    'wears', 'item', 'holding', 'carring', 'bright', 'short', 'lining', ''\n",
     "])"
    ]
   },
@@ -172,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -188,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -215,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -225,7 +225,13 @@
     "\n",
     "# 合并结果并更新计数器\n",
     "for nouns in results:\n",
-    "    class_counter.update(nouns)"
+    "    class_counter.update(nouns)\n",
+    "\n",
+    "# 使用字典推导式剔除计数小于等于2的类别\n",
+    "filtered_class_counter = {k: v for k, v in class_counter.items() if v > 3}\n",
+    "\n",
+    "# 更新 class_counter\n",
+    "class_counter = collections.Counter(filtered_class_counter)"
    ]
   },
   {
@@ -237,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -254,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -380,7 +386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {